antonjeran commited on Nov 20, 2022

Commit

b53f252

1 Parent(s): fd529ef

Upload 40 files

Browse files

Files changed (41) hide show

.gitattributes +1 -0
LICENSE +661 -0
Poster.pdf +0 -0
README.md +133 -3
code_new/RT60.py +131 -0
code_new/RTS.py +87 -0
code_new/__init__py +2 -0
code_new/__pycache__/RT60.cpython-36.pyc +0 -0
code_new/__pycache__/RT60.cpython-38.pyc +0 -0
code_new/__pycache__/RTS.cpython-38.pyc +0 -0
code_new/__pycache__/model.cpython-36.pyc +0 -0
code_new/__pycache__/model.cpython-38.pyc +0 -0
code_new/__pycache__/trainer.cpython-36.pyc +0 -0
code_new/__pycache__/trainer.cpython-38.pyc +0 -0
code_new/cfg/RIR_eval.yml +25 -0
code_new/cfg/RIR_s1.yml +32 -0
code_new/cfg/RIR_s1_temp.yml +32 -0
code_new/main.py +72 -0
code_new/miscc/__init__.py +2 -0
code_new/miscc/__init__.pyc +0 -0
code_new/miscc/__pycache__/__init__.cpython-36.pyc +0 -0
code_new/miscc/__pycache__/__init__.cpython-38.pyc +0 -0
code_new/miscc/__pycache__/config.cpython-36.pyc +0 -0
code_new/miscc/__pycache__/config.cpython-38.pyc +0 -0
code_new/miscc/__pycache__/datasets.cpython-36.pyc +0 -0
code_new/miscc/__pycache__/datasets.cpython-38.pyc +0 -0
code_new/miscc/__pycache__/utils.cpython-36.pyc +0 -0
code_new/miscc/__pycache__/utils.cpython-38.pyc +0 -0
code_new/miscc/config.py +97 -0
code_new/miscc/config.pyc +0 -0
code_new/miscc/datasets.py +113 -0
code_new/miscc/datasets.pyc +0 -0
code_new/miscc/utils.py +239 -0
code_new/miscc/utils.pyc +0 -0
code_new/model.py +413 -0
code_new/single_copy.py +46 -0
code_new/trainer.py +392 -0
download_data.sh +3 -0
download_generate.sh +2 -0
example1.py +20 -0
slides.pptx +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+slides.pptx filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,661 @@

+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU Affero General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Remote Network Interaction; Use with the GNU General Public License.
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.

Poster.pdf ADDED Viewed

Binary file (740 kB). View file

README.md CHANGED Viewed

@@ -1,3 +1,133 @@
----
-license: cc-by-nc-4.0
----

+# FAST-RIR: FAST NEURAL DIFFUSE ROOM IMPULSE RESPONSE GENERATOR (ICASSP 2022)
+This is the official implementation of our neural-network-based  fast  diffuse  room  impulse  response  generator ([**FAST-RIR**](https://arxiv.org/pdf/2110.04057.pdf))  for  generating  room impulse responses (RIRs) for a given rectangular acoustic environment. Our model is inspired by [**StackGAN**](https://github.com/hanzhanggit/StackGAN-Pytorch) architecture. The audio examples and spectrograms of the generated RIRs are available [here](https://anton-jeran.github.io/FRIR/).
+**NEWS : We have genaralized our FAST-RIR to generate RIRs for any 3D indoor scenes represented using meshes. Official code of our network [**MESH2IR**](https://anton-jeran.github.io/M2IR/) is available.**
+## Requirements
+```
+Python3.6
+Pytorch
+python-dateutil
+easydict
+pandas
+torchfile
+gdown
+librosa
+soundfile
+acoustics
+wavefile
+wavfile
+pyyaml==5.4.1
+pickle
+```
+## Embedding
+Each normalized embedding is created as follows: If you are using our trained model, you may need to use extra parameter Correction(CRR).
+```
+Listener Position = LP
+Source Position = SP
+Room Dimension = RD
+Reverberation Time = T60
+Correction = CRR
+CRR = 0.1 if 0.5<T60<0.6
+CRR = 0.2 if T60>0.6
+CRR = 0 otherwise
+Embedding = ([LP_X,LP_Y,LP_Z,SP_X,SP_Y,SP_Z,RD_X,RD_Y,RD_Z,(T60+CRR)] /5) - 1
+```
+## Generete RIRs using trained model
+Download the trained model using this command
+```
+source download_generate.sh
+```
+Create normalized embeddings list in pickle format. You can run following command to generate an example embedding list
+```
+ python3 example1.py
+```
+Run the following command inside **code_new** to generate RIRs corresponding to the normalized embeddings list. You can find generated RIRs inside **code_new/Generated_RIRs**
+```
+python3 main.py --cfg cfg/RIR_eval.yml --gpu 0
+```
+## Range
+Our trained NN-DAS is capable of generating RIRs with the following range accurately.
+```
+Room Dimension X --> 8m to 11m
+Room Dimesnion Y --> 6m to 8m
+Room Dimension Z --> 2.5m to 3.5m
+Listener Position --> Any position within the room
+Speaker Position --> Any position within the room
+Reverberation time --> 0.2s to 0.7s
+```
+## Training the Model
+Run the following command to download the training dataset we created using a [**Diffuse Acoustic Simulator**](https://github.com/GAMMA-UMD/pygsound). You also can train the model using your dataset.
+```
+source download_data.sh
+```
+Run the following command to train the model. You can pass what GPUs to be used for training as an input argument. In this example, I am using 2 GPUs.
+```
+python3 main.py --cfg cfg/RIR_s1.yml --gpu 0,1
+```
+## Related Works
+1) [**IR-GAN: Room Impulse Response Generator for Far-field Speech Recognition (INTERSPEECH2021)**](https://github.com/anton-jeran/IR-GAN)
+2) [**TS-RIR: Translated synthetic room impulse responses for speech augmentation (IEEE ASRU 2021)**](https://github.com/GAMMA-UMD/TS-RIR)
+## Citations
+If you use our **FAST-RIR** for your research, please consider citing
+```
+@INPROCEEDINGS{9747846,
+author={Ratnarajah, Anton and Zhang, Shi-Xiong and Yu, Meng and Tang, Zhenyu and Manocha, Dinesh and Yu, Dong},
+booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+title={Fast-Rir: Fast Neural Diffuse Room Impulse Response Generator},
+year={2022},
+volume={},
+number={},
+pages={571-575},
+doi={10.1109/ICASSP43922.2022.9747846}}
+```
+Our work is inspired by
+```
+@inproceedings{han2017stackgan,
+Author = {Han Zhang and Tao Xu and Hongsheng Li and Shaoting Zhang and Xiaogang Wang and Xiaolei Huang and Dimitris Metaxas},
+Title = {StackGAN: Text to Photo-realistic Image Synthesis with Stacked Generative Adversarial Networks},
+Year = {2017},
+booktitle = {{ICCV}},
+}
+```
+If you use our training dataset generated using [**Diffuse Acoustic Simulator**](https://github.com/GAMMA-UMD/pygsound) in your research, please consider citing
+```
+@inproceedings{9052932,
+  author={Z. {Tang} and L. {Chen} and B. {Wu} and D. {Yu} and D. {Manocha}},
+  booktitle={ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+  title={Improving Reverberant Speech Training Using Diffuse Acoustic Simulation},
+  year={2020},
+  volume={},
+  number={},
+  pages={6969-6973},
+}
+```

code_new/RT60.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import numpy as np
+import math
+from scipy.io import wavfile
+from scipy import stats
+from acoustics.utils import _is_1d
+from acoustics.signal import bandpass
+from acoustics.bands import (_check_band_type, octave_low, octave_high, third_low, third_high)
+import soundfile as sf
+from multiprocessing import Pool
+def t60_impulse(raw_signal,fs):  # pylint: disable=too-many-locals
+    """
+    Reverberation time from a WAV impulse response.
+    :param file_name: name of the WAV file containing the impulse response.
+    :param bands: Octave or third bands as NumPy array.
+    :param rt: Reverberation time estimator. It accepts `'t30'`, `'t20'`, `'t10'` and `'edt'`.
+    :returns: Reverberation time :math:`T_{60}`
+    """
+    bands =np.array([62.5 ,125, 250, 500,1000, 2000])
+    if np.max(raw_signal)==0 and np.min(raw_signal)==0:
+        print('came 1')
+        return .5
+    # fs, raw_signal = wavfile.read(file_name)
+    band_type = _check_band_type(bands)
+    # if band_type == 'octave':
+    low = octave_low(bands[0], bands[-1])
+    high = octave_high(bands[0], bands[-1])
+    # elif band_type == 'third':
+    #     low = third_low(bands[0], bands[-1])
+    #     high = third_high(bands[0], bands[-1])
+    init = -0.0
+    end = -60.0
+    factor = 1.0
+    bands =bands[3:5]
+    low = low[3:5]
+    high = high[3:5]
+    t60 = np.zeros(bands.size)
+    for band in range(bands.size):
+        # Filtering signal
+        filtered_signal = bandpass(raw_signal, low[band], high[band], fs, order=8)
+        abs_signal = np.abs(filtered_signal) / np.max(np.abs(filtered_signal))
+        # Schroeder integration
+        sch = np.cumsum(abs_signal[::-1]**2)[::-1]
+        sch_db = 10.0 * np.log10(sch / np.max(sch))
+        if math.isnan(sch_db[1]):
+            print('came 2')
+            return .5
+        # print("leng sch_db ",sch_db.size)
+        # print("sch_db ",sch_db)
+        # Linear regression
+        sch_init = sch_db[np.abs(sch_db - init).argmin()]
+        sch_end = sch_db[np.abs(sch_db - end).argmin()]
+        init_sample = np.where(sch_db == sch_init)[0][0]
+        end_sample = np.where(sch_db == sch_end)[0][0]
+        x = np.arange(init_sample, end_sample + 1) / fs
+        y = sch_db[init_sample:end_sample + 1]
+        slope, intercept = stats.linregress(x, y)[0:2]
+        # Reverberation time (T30, T20, T10 or EDT)
+        db_regress_init = (init - intercept) / slope
+        db_regress_end = (end - intercept) / slope
+        t60[band] = factor * (db_regress_end - db_regress_init)
+    mean_t60 =(t60[1]+t60[0])/2
+    # print("meant60 is ", mean_t60)
+    if math.isnan(mean_t60):
+        print('came 3')
+        return .5
+    return mean_t60
+def t60_error(filename1,filename2):
+    real_wave,fs = sf.read(filename1)
+    fake_wave,fs = sf.read(filename2)
+    channel = int(real_wave.size/len(real_wave))
+    pool = Pool(processes=8)
+    results =[]
+    for n in range(channel):
+        results.append(pool.apply_async(t60_parallel, args=(n,real_wave,fake_wave,fs,)))
+    T60_error =0
+    for result in results:
+        T60_error =  T60_error + result.get()
+    T60_error = T60_error/channel
+    pool.close()
+    pool.join()
+    # T60_error = Parallel(n_jobs=64)(delayed(t60_parallel)(n, real_wave,fake_wave,fs) for n in range(channel))#np.random.randint(0,1023,size=channel))#
+    # T60_error = sum(results)/channel
+    # for n in range(channel):
+    #     real_wave_single   = real_wave[:,n]
+    #     fake_wave_single   = fake_wave[:,n]
+    #     Real_T60_val = t60_impulse(real_wave_single,fs)
+    #     Fake_T60_val = t60_impulse(fake_wave_single,fs)
+    #     T60_diff = abs(Real_T60_val-Fake_T60_val)
+    #     T60_error = T60_error + T60_diff
+    # T60_error = T60_error/channel
+    return str(T60_error)
+def t60_parallel(n,real_wave,fake_wave,fs):
+    real_wave_single   = real_wave[n,:]
+    fake_wave_single   = fake_wave[n,:]
+    Real_T60_val = t60_impulse(real_wave_single,fs)
+    Fake_T60_val = t60_impulse(fake_wave_single,fs)
+    T60_diff = abs(Real_T60_val-Fake_T60_val)
+    return T60_diff
+if __name__ == '__main__':
+    t60_impulse('/home/anton/Desktop/gamma101/data/evaluation_all/SF1/Hotel_SkalskyDvur_ConferenceRoom2-MicID01-SpkID01_20170906_S-09-RIR-IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav')

code_new/RTS.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import numpy as np
+# import librosa
+from scipy.io import wavfile
+from scipy import stats
+import soundfile as sf
+from acoustics.utils import _is_1d
+from acoustics.signal import bandpass
+from acoustics.bands import (_check_band_type, octave_low, octave_high, third_low, third_high)
+def t60_impulse(file_name):  # pylint: disable=too-many-locals
+    """
+    Reverberation time from a WAV impulse response.
+    :param file_name: name of the WAV file containing the impulse response.
+    :param bands: Octave or third bands as NumPy array.
+    :param rt: Reverberation time estimator. It accepts `'t30'`, `'t20'`, `'t10'` and `'edt'`.
+    :returns: Reverberation time :math:`T_{60}`
+    """
+    bands =np.array([62.5 ,125, 250, 500,1000, 2000])
+    fs =16000;
+    # raw_signal, _ = librosa.load(file_name, sr=fs, mono=True, duration=1)
+    # fs, raw_signal = wavfile.read(file_name)
+    raw_signal,fs = sf.read(file_name)
+    band_type = _check_band_type(bands)
+    # if band_type == 'octave':
+    low = octave_low(bands[0], bands[-1])
+    high = octave_high(bands[0], bands[-1])
+    # elif band_type == 'third':
+    #     low = third_low(bands[0], bands[-1])
+    #     high = third_high(bands[0], bands[-1])
+    init = -0.0
+    end = -60.0
+    factor = 1.0
+    bands =bands[3:5]
+    low = low[3:5]
+    high = high[3:5]
+    t60 = np.zeros(bands.size)
+    for band in range(bands.size):
+        # Filtering signal
+        filtered_signal = bandpass(raw_signal, low[band], high[band], fs, order=8)
+        abs_signal = np.abs(filtered_signal) / np.max(np.abs(filtered_signal))
+        # Schroeder integration
+        sch = np.cumsum(abs_signal[::-1]**2)[::-1]
+        sch_db = 10.0 * np.log10(sch / np.max(sch))
+        # Linear regression
+        sch_init = sch_db[np.abs(sch_db - init).argmin()]
+        sch_end = sch_db[np.abs(sch_db - end).argmin()]
+        init_sample = np.where(sch_db == sch_init)[0][0]
+        end_sample = np.where(sch_db == sch_end)[0][0]
+        x = np.arange(init_sample, end_sample + 1) / fs
+        y = sch_db[init_sample:end_sample + 1]
+        slope, intercept = stats.linregress(x, y)[0:2]
+        # Reverberation time (T30, T20, T10 or EDT)
+        db_regress_init = (init - intercept) / slope
+        db_regress_end = (end - intercept) / slope
+        t60[band] = factor * (db_regress_end - db_regress_init)
+    mean_t60 =(t60[1]+t60[0])/2
+    return mean_t60
+def t60_error(file_name1,file_name2):
+    RT_real = t60_impulse(file_name1)
+    RT_fake = t60_impulse(file_name2)
+    RT_diff = abs(RT_real-RT_fake)
+    return str(RT_diff)
+if __name__ == '__main__':
+    t60_impulse('/home/anton/Anton/data/vcc2016_training/SF1/VUT_FIT_D105-MicID01-SpkID04_20170901_S-12-RIR-IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/2.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/3.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/4.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/5.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/6.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/7.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/8.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/9.wav')
+    # t60_impulse('/home/anton/Desktop/data/vcc2016_training/SF1/10.wav')

code_new/__init__py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from __future__ import division
2	+ from __future__ import print_function

code_new/__pycache__/RT60.cpython-36.pyc ADDED Viewed

Binary file (3.13 kB). View file

code_new/__pycache__/RT60.cpython-38.pyc ADDED Viewed

Binary file (3.12 kB). View file

code_new/__pycache__/RTS.cpython-38.pyc ADDED Viewed

Binary file (2.34 kB). View file

code_new/__pycache__/model.cpython-36.pyc ADDED Viewed

Binary file (9.16 kB). View file

code_new/__pycache__/model.cpython-38.pyc ADDED Viewed

Binary file (9.05 kB). View file

code_new/__pycache__/trainer.cpython-36.pyc ADDED Viewed

Binary file (7.79 kB). View file

code_new/__pycache__/trainer.cpython-38.pyc ADDED Viewed

Binary file (7.31 kB). View file

code_new/cfg/RIR_eval.yml ADDED Viewed

	@@ -0,0 +1,25 @@

+CONFIG_NAME: 'eval'
+DATASET_NAME: 'RIR'
+EMBEDDING_TYPE: 'cnn-rnn'
+GPU_ID: '0,1'
+# Z_DIM: 100
+NET_G: '../generate/netG_epoch_242.pth'
+DATA_DIR: '../data/Medium_Room'
+EVAL_DIR: '../example1.pickle'
+WORKERS: 4
+RIRSIZE: 4096
+STAGE: 1
+TRAIN:
+    FLAG: False
+    BATCH_SIZE: 64
+GAN:
+    CONDITION_DIM: 10
+    DF_DIM: 96
+    GF_DIM: 256
+TEXT:
+    DIMENSION: 10

code_new/cfg/RIR_s1.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+CONFIG_NAME: 'stageI'
+DATASET_NAME: 'RIR'
+EMBEDDING_TYPE: 'cnn-rnn'
+GPU_ID: '0,1'
+DATA_DIR: '../data/Medium_Room'
+EVAL_DIR: '../generate/embeddings/'
+RIRSIZE: 4096
+WORKERS: 4
+STAGE: 1
+TRAIN:
+    FLAG: True
+    BATCH_SIZE: 128
+    MAX_EPOCH: 2000
+    LR_DECAY_EPOCH: 40
+    SNAPSHOT_INTERVAL: 50
+    # DISCRIMINATOR_LR: 0.0002
+    # GENERATOR_LR: 0.0002
+    DISCRIMINATOR_LR: 0.00008
+    GENERATOR_LR: 0.00008
+    COEFF:
+      KL: 2.0
+GAN:
+    CONDITION_DIM: 10
+    DF_DIM: 96
+    GF_DIM: 256
+TEXT:
+    DIMENSION: 10

code_new/cfg/RIR_s1_temp.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+CONFIG_NAME: 'stageI'
+DATASET_NAME: 'RIR'
+EMBEDDING_TYPE: 'cnn-rnn'
+GPU_ID: '0,1'
+DATA_DIR: '../data/Medium_Room'
+EVAL_DIR: '../generate/embeddings/'
+RIRSIZE: 4096
+WORKERS: 4
+STAGE: 1
+TRAIN:
+    FLAG: True
+    BATCH_SIZE: 128
+    MAX_EPOCH: 2000
+    LR_DECAY_EPOCH: 40
+    SNAPSHOT_INTERVAL: 50
+    # DISCRIMINATOR_LR: 0.0002
+    # GENERATOR_LR: 0.0002
+    DISCRIMINATOR_LR: 0.00008
+    GENERATOR_LR: 0.00008
+    COEFF:
+      KL: 2.0
+GAN:
+    CONDITION_DIM: 10
+    DF_DIM: 96
+    GF_DIM: 256
+TEXT:
+    DIMENSION: 10

code_new/main.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from __future__ import print_function
+import torch.backends.cudnn as cudnn
+import torch
+import torchvision.transforms as transforms
+import argparse
+import os
+import random
+import sys
+import pprint
+import datetime
+import dateutil
+import dateutil.tz
+dir_path = (os.path.abspath(os.path.join(os.path.realpath(__file__), './.')))
+sys.path.append(dir_path)
+from miscc.datasets import TextDataset
+from miscc.config import cfg, cfg_from_file
+from miscc.utils import mkdir_p
+from trainer import GANTrainer
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a GAN network')
+    parser.add_argument('--cfg', dest='cfg_file',
+                        help='optional config file',
+                        default='birds_stage1.yml', type=str)
+    parser.add_argument('--gpu',  dest='gpu_id', type=str, default='0')
+    parser.add_argument('--data_dir', dest='data_dir', type=str, default='')
+    parser.add_argument('--manualSeed', type=int, help='manual seed')
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    if args.cfg_file is not None:
+        cfg_from_file(args.cfg_file)
+    if args.gpu_id != -1:
+        cfg.GPU_ID = args.gpu_id
+    if args.data_dir != '':
+        cfg.DATA_DIR = args.data_dir
+    print('Using config:')
+    pprint.pprint(cfg)
+    if args.manualSeed is None:
+        args.manualSeed = random.randint(1, 10000)
+    random.seed(args.manualSeed)
+    torch.manual_seed(args.manualSeed)
+    if cfg.CUDA:
+        torch.cuda.manual_seed_all(args.manualSeed)
+    now = datetime.datetime.now(dateutil.tz.tzlocal())
+    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
+    output_dir = '../output/%s_%s_%s' % \
+                 (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
+    num_gpu = len(cfg.GPU_ID.split(','))
+    if cfg.TRAIN.FLAG:
+        dataset = TextDataset(cfg.DATA_DIR, 'train',
+                              rirsize=cfg.RIRSIZE)
+        assert dataset
+        #commented for temporary
+        dataloader = torch.utils.data.DataLoader(
+            dataset, batch_size=cfg.TRAIN.BATCH_SIZE * num_gpu,
+            drop_last=True, shuffle=True, num_workers=int(cfg.WORKERS))
+        algo = GANTrainer(output_dir)
+        algo.train(dataloader, cfg.STAGE)
+    else:
+        file_path = cfg.EVAL_DIR
+        algo = GANTrainer(output_dir)
+        algo.sample(file_path, cfg.STAGE)

code_new/miscc/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from __future__ import division
2	+ from __future__ import print_function

code_new/miscc/__init__.pyc ADDED Viewed

Binary file (241 Bytes). View file

code_new/miscc/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file (218 Bytes). View file

code_new/miscc/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (243 Bytes). View file

code_new/miscc/__pycache__/config.cpython-36.pyc ADDED Viewed

Binary file (2.11 kB). View file

code_new/miscc/__pycache__/config.cpython-38.pyc ADDED Viewed

Binary file (2.13 kB). View file

code_new/miscc/__pycache__/datasets.cpython-36.pyc ADDED Viewed

Binary file (2.49 kB). View file

code_new/miscc/__pycache__/datasets.cpython-38.pyc ADDED Viewed

Binary file (2.55 kB). View file

code_new/miscc/__pycache__/utils.cpython-36.pyc ADDED Viewed

Binary file (4.37 kB). View file

code_new/miscc/__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (4.4 kB). View file

code_new/miscc/config.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from __future__ import division
+from __future__ import print_function
+import os.path as osp
+import numpy as np
+from easydict import EasyDict as edict
+__C = edict()
+cfg = __C
+# Dataset name: flowers, birds
+__C.DATASET_NAME = 'birds'
+__C.EMBEDDING_TYPE = 'cnn-rnn'
+__C.CONFIG_NAME = ''
+__C.GPU_ID = '0'
+__C.CUDA = True
+__C.WORKERS = 6
+__C.NET_G = ''
+__C.NET_D = ''
+__C.STAGE1_G = ''
+__C.DATA_DIR = ''
+__C.EVAL_DIR = ''
+__C.VIS_COUNT = 64
+__C.Z_DIM = 100
+__C.RIRSIZE = 4096
+__C.STAGE = 1
+# Training options
+__C.TRAIN = edict()
+__C.TRAIN.FLAG = True
+__C.TRAIN.BATCH_SIZE = 64
+__C.TRAIN.MAX_EPOCH = 600
+__C.TRAIN.SNAPSHOT_INTERVAL = 50
+__C.TRAIN.PRETRAINED_MODEL = ''
+__C.TRAIN.PRETRAINED_EPOCH = 600
+__C.TRAIN.LR_DECAY_EPOCH = 600
+__C.TRAIN.DISCRIMINATOR_LR = 2e-4
+__C.TRAIN.GENERATOR_LR = 2e-4
+__C.TRAIN.COEFF = edict()
+__C.TRAIN.COEFF.KL = 2.0
+# Modal options
+__C.GAN = edict()
+__C.GAN.CONDITION_DIM = 128
+__C.GAN.DF_DIM = 64
+__C.GAN.GF_DIM = 128
+__C.GAN.R_NUM = 4
+__C.TEXT = edict()
+__C.TEXT.DIMENSION = 1024
+def _merge_a_into_b(a, b):
+    """Merge config dictionary a into config dictionary b, clobbering the
+    options in b whenever they are also specified in a.
+    """
+    if type(a) is not edict:
+        return
+    for k, v in a.items():
+        # a must specify keys that are in b
+        if k not in b:
+            raise KeyError('{} is not a valid config key'.format(k))
+        # the types must match, too
+        old_type = type(b[k])
+        if old_type is not type(v):
+            if isinstance(b[k], np.ndarray):
+                v = np.array(v, dtype=b[k].dtype)
+            else:
+                raise ValueError(('Type mismatch ({} vs. {}) '
+                                  'for config key: {}').format(type(b[k]),
+                                                               type(v), k))
+        # recursively merge dicts
+        if type(v) is edict:
+            try:
+                _merge_a_into_b(a[k], b[k])
+            except:
+                print('Error under config key: {}'.format(k))
+                raise
+        else:
+            b[k] = v
+def cfg_from_file(filename):
+    """Load a config file and merge it into the default options."""
+    import yaml
+    with open(filename, 'r') as f:
+        yaml_cfg = edict(yaml.load(f))
+    _merge_a_into_b(yaml_cfg, __C)

code_new/miscc/config.pyc ADDED Viewed

Binary file (2.71 kB). View file

code_new/miscc/datasets.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import torch.utils.data as data
+# from PIL import Image
+import soundfile as sf
+import PIL
+import os
+import os.path
+import pickle
+import random
+import numpy as np
+import pandas as pd
+from scipy import signal
+from miscc.config import cfg
+class TextDataset(data.Dataset):
+    def __init__(self, data_dir, split='train',rirsize=4096): #, transform=None, target_transform=None):
+        # self.transform = transform
+        # self.target_transform = target_transform
+        self.rirsize = rirsize
+        self.data = []
+        self.data_dir = data_dir
+        self.bbox = None
+        split_dir = os.path.join(data_dir, split)
+        self.filenames = self.load_filenames(split_dir)
+        self.embeddings = self.load_embedding(split_dir)
+    def get_RIR(self, RIR_path):
+        wav,fs = sf.read(RIR_path) #Image.open(RIR_path).convert('RGB')
+        length = wav.size
+        # crop_length = int((16384*(80))/(64))
+        crop_length = 4096 #int(16384)
+        if(length<crop_length):
+            zeros = np.zeros(crop_length-length)
+            RIR_original = np.concatenate([wav,zeros])
+        else:
+            RIR_original = wav[0:crop_length]
+        # resample_length = int((self.rirsize*(80))/(64))
+        resample_length = int(self.rirsize)
+        if(resample_length==16384):
+            RIR = RIR_original
+        else:
+            RIR = RIR_original#signal.resample(RIR_original,resample_length)
+        RIR = np.array([RIR]).astype('float32')
+        # if bbox is not None:
+        #     R = int(np.maximum(bbox[2], bbox[3]) * 0.75)
+        #     center_x = int((2 * bbox[0] + bbox[2]) / 2)
+        #     center_y = int((2 * bbox[1] + bbox[3]) / 2)
+        #     y1 = np.maximum(0, center_y - R)
+        #     y2 = np.minimum(height, center_y + R)
+        #     x1 = np.maximum(0, center_x - R)
+        #     x2 = np.minimum(width, center_x + R)
+        #     RIR = RIR.crop([x1, y1, x2, y2])
+        # load_size = int(self.rirsize * 76 / 64)
+        # RIR = RIR.resize((load_size, load_size), PIL.Image.BILINEAR)
+        # if self.transform is not None:
+        #     RIR = self.transform(RIR)
+        return RIR
+    def load_embedding(self, data_dir):
+        embedding_filename   = '/embeddings.pickle'
+        with open(data_dir + embedding_filename, 'rb') as f:
+            embeddings = pickle.load(f)
+            # embeddings = np.array(embeddings)
+            # # embedding_shape = [embeddings.shape[-1]]
+            # print('embeddings: ', embeddings.shape)
+        return embeddings
+    # def load_class_id(self, data_dir, total_num):
+    #     if os.path.isfile(data_dir + '/class_info.pickle'):
+    #         with open(data_dir + '/class_info.pickle', 'rb') as f:
+    #             class_id = pickle.load(f)
+    #     else:
+    #         class_id = np.arange(total_num)
+    #     return class_id
+    def load_filenames(self, data_dir):
+        filepath = os.path.join(data_dir, 'filenames.pickle')
+        with open(filepath, 'rb') as f:
+            filenames = pickle.load(f)
+        print('Load filenames from: %s (%d)' % (filepath, len(filenames)))
+        return filenames
+    def __getitem__(self, index):
+        key = self.filenames[index]
+        data_dir = self.data_dir
+        # captions = self.captions[key]
+        embeddings = self.embeddings[key]
+        RIR_name = '%s/RIR/%s.wav' % (data_dir, key)
+        RIR = self.get_RIR(RIR_name)
+        embedding = np.array(embeddings).astype('float32')
+        # if self.target_transform is not None:
+        #     embedding = self.target_transform(embedding)
+        return RIR, embedding
+    def __len__(self):
+        return len(self.filenames)

code_new/miscc/datasets.pyc ADDED Viewed

Binary file (3.16 kB). View file

code_new/miscc/utils.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import os
+import errno
+import numpy as np
+from copy import deepcopy
+from miscc.config import cfg
+from scipy.io.wavfile import write
+from torch.nn import init
+import torch
+import torch.nn as nn
+import torchvision.utils as vutils
+from wavefile import WaveWriter, Format
+import RT60
+from multiprocessing import Pool
+#############################
+def KL_loss(mu, logvar):
+    # -0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
+    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
+    KLD = torch.mean(KLD_element).mul_(-0.5)
+    return KLD
+def compute_discriminator_loss(netD, real_RIRs, fake_RIRs,
+                               real_labels, fake_labels,
+                               conditions, gpus):
+    criterion = nn.BCELoss()
+    batch_size = real_RIRs.size(0)
+    cond = conditions.detach()
+    fake = fake_RIRs.detach()
+    real_features = nn.parallel.data_parallel(netD, (real_RIRs), gpus)
+    fake_features = nn.parallel.data_parallel(netD, (fake), gpus)
+    # real pairs
+    #print("util conditions ",cond.size())
+    inputs = (real_features, cond)
+    real_logits = nn.parallel.data_parallel(netD.get_cond_logits, inputs, gpus)
+    errD_real = criterion(real_logits, real_labels)
+    # wrong pairs
+    inputs = (real_features[:(batch_size-1)], cond[1:])
+    wrong_logits = \
+        nn.parallel.data_parallel(netD.get_cond_logits, inputs, gpus)
+    errD_wrong = criterion(wrong_logits, fake_labels[1:])
+    # fake pairs
+    inputs = (fake_features, cond)
+    fake_logits = nn.parallel.data_parallel(netD.get_cond_logits, inputs, gpus)
+    errD_fake = criterion(fake_logits, fake_labels)
+    if netD.get_uncond_logits is not None:
+        real_logits = \
+            nn.parallel.data_parallel(netD.get_uncond_logits,
+                                      (real_features), gpus)
+        fake_logits = \
+            nn.parallel.data_parallel(netD.get_uncond_logits,
+                                      (fake_features), gpus)
+        uncond_errD_real = criterion(real_logits, real_labels)
+        uncond_errD_fake = criterion(fake_logits, fake_labels)
+        #
+        errD = ((errD_real + uncond_errD_real) / 2. +
+                (errD_fake + errD_wrong + uncond_errD_fake) / 3.)
+        errD_real = (errD_real + uncond_errD_real) / 2.
+        errD_fake = (errD_fake + uncond_errD_fake) / 2.
+    else:
+        errD = errD_real + (errD_fake + errD_wrong) * 0.5
+    return errD, errD_real.data, errD_wrong.data, errD_fake.data
+    # return errD, errD_real.data[0], errD_wrong.data[0], errD_fake.data[0]
+def compute_generator_loss(epoch,netD,real_RIRs, fake_RIRs, real_labels, conditions, gpus):
+    criterion = nn.BCELoss()
+    loss = nn.L1Loss() #nn.MSELoss()
+    loss1 = nn.MSELoss()
+    RT_error = 0
+    # print("num", real_RIRs.size(),real_RIRs.size()[0])
+    # input("kk")
+    cond = conditions.detach()
+    fake_features = nn.parallel.data_parallel(netD, (fake_RIRs), gpus)
+    # fake pairs
+    inputs = (fake_features, cond)
+    fake_logits = nn.parallel.data_parallel(netD.get_cond_logits, inputs, gpus)
+    MSE_error = loss(real_RIRs,fake_RIRs)
+    MSE_error1 = loss1(real_RIRs,fake_RIRs)
+    sample_size = real_RIRs.size()[0]
+    channel = 12
+    fs = 16000
+    rn = np.random.randint(sample_size-(channel*2))
+    real_wave = np.array(real_RIRs[rn:rn+channel].to("cpu").detach())
+    real_wave = real_wave.reshape(channel,4096)
+    fake_wave = np.array(fake_RIRs[rn:rn+channel].to("cpu").detach())
+    fake_wave = fake_wave.reshape(channel,4096)
+    pool = Pool(processes=12)
+    results =[]
+    for n in range(channel):
+        results.append(pool.apply_async(RT60.t60_parallel, args=(n,real_wave,fake_wave,fs,)))
+    T60_error =0
+    for result in results:
+        T60_error =  T60_error + result.get()
+    RT_error = T60_error/channel
+    pool.close()
+    pool.join()
+    # T60_error =0
+    # for m in range(channel):
+    #     real_wave_single   = real_wave[:,(rn+m)]
+    #     fake_wave_single   = fake_wave[:,(rn+m)]
+    #     Real_T60_val = RT60.t60_impulse(real_wave_single,fs)
+    #     Fake_T60_val = RT60.t60_impulse(fake_wave_single,fs)
+    #     T60_diff = abs(Real_T60_val-Fake_T60_val)
+    #     T60_error =  T60_error + T60_diff
+    # RT_error = T60_error/channel
+    # r = WaveWriter("real.wav", channels=portion, samplerate=fs)
+    # r.write(np.array(real_IR))
+    # f = WaveWriter("fake.wav", channels=portion, samplerate=fs)
+    # f.write(np.array(fake_IR))
+    # result = call_python_version("3.8", "RT60", "t60_error",
+    #                          ["real.wav","fake.wav"])
+    # # print("RT_error ",result)
+    # RT_error = float(result)
+    # print("RT_error ",RT_error)
+    # if(epoch<100):
+    #     errD_fake = criterion(fake_logits, real_labels)# + 2* 4096 * MSE_error
+    # else:
+    #     errD_fake = criterion(fake_logits, real_labels) + 2* 4096 * MSE_error
+    errD_fake = criterion(fake_logits, real_labels) + 5* 4096 * MSE_error1 + 40 * RT_error
+    if netD.get_uncond_logits is not None:
+        fake_logits = \
+            nn.parallel.data_parallel(netD.get_uncond_logits,
+                                      (fake_features), gpus)
+        uncond_errD_fake = criterion(fake_logits, real_labels)
+        errD_fake += uncond_errD_fake
+    return errD_fake, MSE_error,RT_error
+#############################
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        m.weight.data.normal_(0.0, 0.02)
+    elif classname.find('BatchNorm') != -1:
+        m.weight.data.normal_(1.0, 0.02)
+        m.bias.data.fill_(0)
+    elif classname.find('Linear') != -1:
+        m.weight.data.normal_(0.0, 0.02)
+        if m.bias is not None:
+            m.bias.data.fill_(0.0)
+#############################
+def save_RIR_results(data_RIR, fake, epoch, RIR_dir):
+    num = cfg.VIS_COUNT
+    fake = fake[0:num]
+    # data_RIR is changed to [0,1]
+    if data_RIR is not None:
+        data_RIR = data_RIR[0:num]
+        for i in range(num):
+            # #print("came 1")
+            real_RIR_path = RIR_dir+"/real_sample"+str(i)+".wav"
+            fake_RIR_path = RIR_dir+"/fake_sample"+str(i)+"_epoch_"+str(epoch)+".wav"
+            fs =16000
+            real_IR = np.array(data_RIR[i].to("cpu").detach())
+            fake_IR = np.array(fake[i].to("cpu").detach())
+            # #print("fake_IR ", fake_IR.size)
+            # #print("real_IR ", real_IR.size)
+            # #print("max real_IR ", max(real_IR[0]))
+            # #print("min real_IR ", min(real_IR[0]))
+            r = WaveWriter(real_RIR_path, channels=1, samplerate=fs)
+            r.write(np.array(real_IR))
+            f = WaveWriter(fake_RIR_path, channels=1, samplerate=fs)
+            f.write(np.array(fake_IR))
+            # write(real_RIR_path,fs,real_IR)
+            # write(fake_RIR_path,fs,fake_IR)
+            # write(real_RIR_path,fs,real_IR)
+            # write(fake_RIR_path,fs,fake_IR)
+        # vutils.save_image(
+        #     data_RIR, '%s/real_samples.png' % RIR_dir,
+        #     normalize=True)
+        # # fake.data is still [-1, 1]
+        # vutils.save_image(
+        #     fake.data, '%s/fake_samples_epoch_%03d.png' %
+        #     (RIR_dir, epoch), normalize=True)
+    else:
+        for i in range(num):
+            # #print("came 2")
+            fake_RIR_path = RIR_dir+"/small_fake_sample"+str(i)+"_epoch_"+str(epoch)+".wav"
+            fs =16000
+            fake_IR = np.array(fake[i].to("cpu").detach())
+            f = WaveWriter(fake_RIR_path, channels=1, samplerate=fs)
+            f.write(np.array(fake_IR))
+            # write(fake_RIR_path,fs,fake[i].astype(np.float32))
+        # vutils.save_image(
+        #     fake.data, '%s/lr_fake_samples_epoch_%03d.png' %
+        #     (RIR_dir, epoch), normalize=True)
+def save_model(netG, netD, epoch, model_dir):
+    torch.save(
+        netG.state_dict(),
+        '%s/netG_epoch_%d.pth' % (model_dir, epoch))
+    torch.save(
+        netD.state_dict(),
+        '%s/netD_epoch_last.pth' % (model_dir))
+    #print('Save G/D models')
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc:  # Python >2.5
+        if exc.errno == errno.EEXIST and os.path.isdir(path):
+            pass
+        else:
+            raise

code_new/miscc/utils.pyc ADDED Viewed

Binary file (5.71 kB). View file

code_new/model.py ADDED Viewed

	@@ -0,0 +1,413 @@

+import torch
+import torch.nn as nn
+import torch.nn.parallel
+from miscc.config import cfg
+from torch.autograd import Variable
+def conv3x1(in_planes, out_planes, stride=1):
+    "3x1 convolution with padding"
+    kernel_length  = 41
+    return nn.Conv1d(in_planes, out_planes, kernel_size=kernel_length, stride=stride,
+                     padding=20, bias=False)
+def old_conv3x1(in_planes, out_planes, stride=1):
+    "3x1 convolution with padding"
+    kernel_length  = 3
+    return nn.Conv1d(in_planes, out_planes, kernel_size=kernel_length, stride=stride,
+                     padding=1, bias=False)
+# def convn3x1(in_planes, out_planes, stride=1):
+#     "3x1 convolution with padding"
+#     return nn.Conv1d(in_planes, out_planes, kernel_size=9, stride=stride,
+#                      padding=4, bias=False)
+# Upsale the spatial size by a factor of 2
+def upBlock4(in_planes, out_planes):
+    kernel_length  = 41
+    stride = 4
+    block = nn.Sequential(
+        # nn.Upsample(scale_factor=4, mode='nearest'),
+        # conv3x1(in_planes, out_planes),
+        nn.ConvTranspose1d(in_planes,out_planes,kernel_size=kernel_length,stride=stride, padding=19,output_padding=1),
+        nn.BatchNorm1d(out_planes),
+        # nn.ReLU(True)
+        nn.PReLU())
+    return block
+def upBlock2(in_planes, out_planes):
+    kernel_length  = 41
+    stride = 2
+    block = nn.Sequential(
+        # nn.Upsample(scale_factor=4, mode='nearest'),
+        # conv3x1(in_planes, out_planes),
+        nn.ConvTranspose1d(in_planes,out_planes,kernel_size=kernel_length,stride=stride, padding=20,output_padding=1),
+        nn.BatchNorm1d(out_planes),
+        # nn.ReLU(True)
+        nn.PReLU())
+    return block
+def sameBlock(in_planes, out_planes):
+    block = nn.Sequential(
+        # nn.Upsample(scale_factor=4, mode='nearest'),
+        conv3x1(in_planes, out_planes),
+        nn.BatchNorm1d(out_planes),
+        # nn.ReLU(True)
+        nn.PReLU())
+    return block
+class ResBlock(nn.Module):
+    def __init__(self, channel_num):
+        super(ResBlock, self).__init__()
+        self.block = nn.Sequential(
+            conv3x1(channel_num, channel_num),
+            nn.BatchNorm1d(channel_num),
+            # nn.ReLU(True),
+            nn.PReLU(),
+            conv3x1(channel_num, channel_num),
+            nn.BatchNorm1d(channel_num))
+        self.relu = nn.PReLU()#nn.ReLU(inplace=True)
+    def forward(self, x):
+        residual = x
+        out = self.block(x)
+        out += residual
+        out = self.relu(out)
+        return out
+# class CA_NET(nn.Module): #not chnaged yet
+#     # some code is modified from vae examples
+#     # (https://github.com/pytorch/examples/blob/master/vae/main.py)
+#     def __init__(self):
+#         super(CA_NET, self).__init__()
+#         self.t_dim = cfg.TEXT.DIMENSION
+#         self.c_dim = cfg.GAN.CONDITION_DIM
+#         self.fc = nn.Linear(self.t_dim, self.c_dim * 2, bias=True)
+#         self.relu = nn.ReLU()
+#     def encode(self, text_embedding):
+#         x = self.relu(self.fc(text_embedding))
+#         mu = x[:, :self.c_dim]
+#         logvar = x[:, self.c_dim:]
+#         return mu, logvar
+#     def reparametrize(self, mu, logvar):
+#         std = logvar.mul(0.5).exp_()
+#         if cfg.CUDA:
+#             eps = torch.cuda.FloatTensor(std.size()).normal_()
+#         else:
+#             eps = torch.FloatTensor(std.size()).normal_()
+#         eps = Variable(eps)
+#         return eps.mul(std).add_(mu)
+#     def forward(self, text_embedding):
+#         mu, logvar = self.encode(text_embedding)
+#         c_code = self.reparametrize(mu, logvar)
+#         return c_code, mu, logvar
+class COND_NET(nn.Module): #not chnaged yet
+    # some code is modified from vae examples
+    # (https://github.com/pytorch/examples/blob/master/vae/main.py)
+    def __init__(self):
+        super(COND_NET, self).__init__()
+        self.t_dim = cfg.TEXT.DIMENSION
+        self.c_dim = cfg.GAN.CONDITION_DIM
+        self.fc = nn.Linear(self.t_dim, self.c_dim, bias=True)
+        self.relu = nn.PReLU()#nn.ReLU()
+    def encode(self, text_embedding):
+        x = self.relu(self.fc(text_embedding))
+        # mu = x[:, :self.c_dim]
+        # logvar = x[:, self.c_dim:]
+        return x
+    # def reparametrize(self, mu, logvar):
+    #     std = logvar.mul(0.5).exp_()
+    #     if cfg.CUDA:
+    #         eps = torch.cuda.FloatTensor(std.size()).normal_()
+    #     else:
+    #         eps = torch.FloatTensor(std.size()).normal_()
+    #     eps = Variable(eps)
+    #     return eps.mul(std).add_(mu)
+    def forward(self, text_embedding):
+        c_code = self.encode(text_embedding)
+        # c_code = self.reparametrize(mu, logvar)
+        return c_code #, mu, logvar
+class D_GET_LOGITS(nn.Module): #not chnaged yet
+    def __init__(self, ndf, nef, bcondition=True):
+        super(D_GET_LOGITS, self).__init__()
+        self.df_dim = ndf
+        self.ef_dim = nef
+        self.bcondition = bcondition
+        kernel_length =41
+        if bcondition:
+            self.convd1d =  nn.ConvTranspose1d(ndf*8,ndf //2,kernel_size=kernel_length,stride=1, padding=20)
+            # self.outlogits = nn.Sequential(
+            #     old_conv3x1(ndf * 8 + nef, ndf * 8),
+            #     nn.BatchNorm1d(ndf * 8),
+            #     nn.LeakyReLU(0.2, inplace=True),
+            #     nn.Conv1d(ndf * 8, 1, kernel_size=16, stride=4),
+            #     # nn.Conv1d(1, 1, kernel_size=16, stride=4),
+            #     nn.Sigmoid()
+            #     )
+            self.outlogits = nn.Sequential(
+                old_conv3x1(ndf //2 + nef, ndf //2 ),
+                nn.BatchNorm1d(ndf //2 ),
+                nn.LeakyReLU(0.2, inplace=True),
+                nn.Conv1d(ndf //2 , 1, kernel_size=16, stride=4),
+                # nn.Conv1d(1, 1, kernel_size=16, stride=4),
+                nn.Sigmoid()
+                )
+        else:
+            # self.outlogits = nn.Sequential(
+            #     nn.Conv1d(ndf * 8, 1, kernel_size=16, stride=4),
+            #     # nn.Conv1d(1, 1, kernel_size=16, stride=4),
+            #     nn.Sigmoid())
+            self.convd1d =  nn.ConvTranspose1d(ndf*8,ndf //2,kernel_size=kernel_length,stride=1, padding=20)
+            self.outlogits = nn.Sequential(
+                nn.Conv1d(ndf // 2 , 1, kernel_size=16, stride=4),
+                # nn.Conv1d(1, 1, kernel_size=16, stride=4),
+                nn.Sigmoid())
+    def forward(self, h_code, c_code=None):
+        # conditioning output
+        h_code = self.convd1d(h_code)
+        if self.bcondition and c_code is not None:
+            #print("mode c_code1 ",c_code.size())
+            c_code = c_code.view(-1, self.ef_dim, 1)
+            #print("mode c_code2 ",c_code.size())
+            c_code = c_code.repeat(1, 1, 16)
+            # state size (ngf+egf) x 16
+            #print("mode c_code ",c_code.size())
+            #print("mode h_code ",h_code.size())
+            h_c_code = torch.cat((h_code, c_code), 1)
+        else:
+            h_c_code = h_code
+        output = self.outlogits(h_c_code)
+        return output.view(-1)
+# ############# Networks for stageI GAN #############
+class STAGE1_G(nn.Module):
+    def __init__(self):
+        super(STAGE1_G, self).__init__()
+        self.gf_dim = cfg.GAN.GF_DIM * 8
+        self.ef_dim = cfg.GAN.CONDITION_DIM
+        # self.z_dim = cfg.Z_DIM
+        self.define_module()
+    def define_module(self):
+        kernel_length  = 41
+        ninput = self.ef_dim #self.z_dim + self.ef_dim
+        ngf = self.gf_dim
+        # TEXT.DIMENSION -> GAN.CONDITION_DIM
+        # self.ca_net = CA_NET()
+        self.cond_net = COND_NET()
+        # -> ngf x 16
+        self.fc = nn.Sequential(
+            nn.Linear(ninput, ngf * 16, bias=False),
+            nn.BatchNorm1d(ngf * 16),
+            # nn.ReLU(True)
+            nn.PReLU())
+        # ngf x 16 -> ngf/2 x 64
+        self.upsample1 = upBlock4(ngf, ngf // 2)
+        # -> ngf/4 x 256
+        self.upsample2 = upBlock4(ngf // 2, ngf // 4)
+        # -> ngf/8 x 1024
+        self.upsample3 = upBlock4(ngf // 4, ngf // 8)
+        # -> ngf/16 x 4096
+        self.upsample4 = upBlock2(ngf // 8, ngf // 16)
+        self.upsample5 = upBlock2(ngf // 16, ngf // 16)
+        # -> 1 x 4096
+        self.RIR = nn.Sequential(
+            nn.ConvTranspose1d(ngf // 16,1,kernel_size=kernel_length,stride=1, padding=20),
+            # old_conv3x1(ngf // 16, 1), # conv3x3(ngf // 16, 3),
+            nn.Tanh())
+    def forward(self, text_embedding):
+        # c_code, mu, logvar = self.ca_net(text_embedding)
+        c_code = self.cond_net(text_embedding)
+        # z_c_code = torch.cat((noise, c_code), 1)
+        h_code = self.fc(c_code)
+        h_code = h_code.view(-1, self.gf_dim, 16)
+        # #print("h_code 1 ",h_code.size())
+        h_code = self.upsample1(h_code)
+        # #print("h_code 2 ",h_code.size())
+        h_code = self.upsample2(h_code)
+        # #print("h_code 3 ",h_code.size())
+        h_code = self.upsample3(h_code)
+        # #print("h_code 4 ",h_code.size())
+        h_code = self.upsample4(h_code)
+        h_code = self.upsample5(h_code)
+        # #print("h_code 5 ",h_code.size())
+        # state size 3 x 64 x 64
+        fake_RIR = self.RIR(h_code)
+        # return None, fake_RIR, mu, logvar
+        #print("generator ", text_embedding.size())
+        return None, fake_RIR, text_embedding #c_code
+class STAGE1_D(nn.Module):
+    def __init__(self):
+        super(STAGE1_D, self).__init__()
+        self.df_dim = cfg.GAN.DF_DIM
+        self.ef_dim = cfg.GAN.CONDITION_DIM
+        self.define_module()
+    def define_module(self):
+        ndf, nef = self.df_dim, self.ef_dim
+        kernel_length =41
+        self.encode_RIR = nn.Sequential(
+            nn.Conv1d(1, ndf, kernel_length, 4, 20, bias=False),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf) x 1024
+            nn.Conv1d(ndf, ndf * 2, kernel_length, 4, 20, bias=False),
+            nn.BatchNorm1d(ndf * 2),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size (ndf*2) x 256
+            nn.Conv1d(ndf*2, ndf * 4, kernel_length, 4, 20, bias=False),
+            nn.BatchNorm1d(ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+            # # state size (ndf*4) x 64
+            nn.Conv1d(ndf*4, ndf * 8, kernel_length, 4, 20, bias=False),
+            nn.BatchNorm1d(ndf * 8),
+            # state size (ndf * 8) x 16)
+            nn.LeakyReLU(0.2, inplace=True)
+        )
+        self.get_cond_logits = D_GET_LOGITS(ndf, nef)
+        self.get_uncond_logits = None
+    def forward(self, RIRs):
+        #print("model RIRs ",RIRs.size())
+        RIR_embedding = self.encode_RIR(RIRs)
+        #print("models RIR_embedding ",RIR_embedding.size())
+        return RIR_embedding
+# ############# Networks for stageII GAN #############
+class STAGE2_G(nn.Module):
+    def __init__(self, STAGE1_G):
+        super(STAGE2_G, self).__init__()
+        self.gf_dim = cfg.GAN.GF_DIM
+        self.ef_dim = cfg.GAN.CONDITION_DIM
+        # self.z_dim = cfg.Z_DIM
+        self.STAGE1_G = STAGE1_G
+        # fix parameters of stageI GAN
+        for param in self.STAGE1_G.parameters():
+            param.requires_grad = False
+        self.define_module()
+    def _make_layer(self, block, channel_num):
+        layers = []
+        for i in range(cfg.GAN.R_NUM):
+            layers.append(block(channel_num))
+        return nn.Sequential(*layers)
+    def define_module(self):
+        ngf = self.gf_dim
+        # TEXT.DIMENSION -> GAN.CONDITION_DIM
+        # self.ca_net = CA_NET()
+        self.cond_net = COND_NET()
+        # --> 4ngf x 16 x 16
+        self.encoder = nn.Sequential(
+            conv3x1(1, ngf),
+            nn.ReLU(True),
+            nn.Conv1d(ngf, ngf * 2, 16, 4, 6, bias=False),
+            nn.BatchNorm1d(ngf * 2),
+            nn.ReLU(True),
+            nn.Conv1d(ngf * 2, ngf * 4, 16, 4, 6, bias=False),
+            nn.BatchNorm1d(ngf * 4),
+            nn.ReLU(True))
+        self.hr_joint = nn.Sequential(
+            conv3x1(self.ef_dim + ngf * 4, ngf * 4),
+            nn.BatchNorm1d(ngf * 4),
+            nn.ReLU(True))
+        self.residual = self._make_layer(ResBlock, ngf * 4)
+        # --> 2ngf x 1024
+        self.upsample1 = upBlock4(ngf * 4, ngf * 2)
+        # --> ngf x 4096
+        self.upsample2 = upBlock4(ngf * 2, ngf)
+        # --> ngf // 2 x 16384
+        self.upsample3 = upBlock4(ngf, ngf // 2)
+        # --> ngf // 4 x 16384
+        self.upsample4 = sameBlock(ngf // 2, ngf // 4)
+        # --> 1 x 16384
+        self.RIR = nn.Sequential(
+            conv3x1(ngf // 4, 1),
+            nn.Tanh())
+    def forward(self, text_embedding):
+        _, stage1_RIR, _= self.STAGE1_G(text_embedding)
+        stage1_RIR = stage1_RIR.detach()
+        encoded_RIR = self.encoder(stage1_RIR)
+        # c_code, mu, logvar = self.ca_net(text_embedding)
+        c_code1 = self.cond_net(text_embedding)
+        c_code = c_code1.view(-1, self.ef_dim, 1)
+        c_code = c_code.repeat(1, 1, 256) # c_code.repeat(1, 1, 16, 16)
+        i_c_code = torch.cat([encoded_RIR, c_code], 1)
+        h_code = self.hr_joint(i_c_code)
+        h_code = self.residual(h_code)
+        h_code = self.upsample1(h_code)
+        h_code = self.upsample2(h_code)
+        h_code = self.upsample3(h_code)
+        h_code = self.upsample4(h_code)
+        fake_RIR = self.RIR(h_code)
+        return stage1_RIR, fake_RIR, c_code1 #mu, logvar
+class STAGE2_D(nn.Module):
+    def __init__(self):
+        super(STAGE2_D, self).__init__()
+        self.df_dim = cfg.GAN.DF_DIM
+        self.ef_dim = cfg.GAN.CONDITION_DIM
+        self.define_module()
+    def define_module(self):
+        ndf, nef = self.df_dim, self.ef_dim
+        self.encode_RIR = nn.Sequential(
+            nn.Conv1d(1, ndf, 3, 1, 1, bias=False),  # 16384 * ndf
+            nn.LeakyReLU(0.2, inplace=True),
+            nn.Conv1d(ndf, ndf * 2, 16, 4, 6, bias=False),
+            nn.BatchNorm1d(ndf * 2),
+            nn.LeakyReLU(0.2, inplace=True),  # 4096 * ndf * 2
+            nn.Conv1d(ndf * 2, ndf * 4, 16, 4, 6, bias=False),
+            nn.BatchNorm1d(ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),  # 1024 * ndf * 4
+            nn.Conv1d(ndf * 4, ndf * 8, 16, 4, 6, bias=False),
+            nn.BatchNorm1d(ndf * 8),
+            nn.LeakyReLU(0.2, inplace=True),  # 256 * ndf * 8
+            nn.Conv1d(ndf * 8, ndf * 16, 16, 4, 6, bias=False),
+            nn.BatchNorm1d(ndf * 16),
+            nn.LeakyReLU(0.2, inplace=True),  # 64 * ndf * 16
+            nn.Conv1d(ndf * 16, ndf * 32, 16, 4, 6, bias=False),
+            nn.BatchNorm1d(ndf * 32),
+            nn.LeakyReLU(0.2, inplace=True),  # 16 * ndf * 32
+            conv3x1(ndf * 32, ndf * 16),
+            nn.BatchNorm1d(ndf * 16),
+            nn.LeakyReLU(0.2, inplace=True),   # 16 * ndf * 16
+            conv3x1(ndf * 16, ndf * 8),
+            nn.BatchNorm1d(ndf * 8),
+            nn.LeakyReLU(0.2, inplace=True)   # 16 * ndf * 8
+        )
+        self.get_cond_logits = D_GET_LOGITS(ndf, nef, bcondition=True)
+        self.get_uncond_logits = D_GET_LOGITS(ndf, nef, bcondition=False)
+    def forward(self, RIRs):
+        RIR_embedding = self.encode_RIR(RIRs)
+        return RIR_embedding

code_new/single_copy.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os, fnmatch
+import numpy as np
+import random
+import soundfile as sf
+from scipy.io.wavfile import write
+# import librosa
+import RT60
+folder_path = "/cephfs/anton/room-impulse-responses/AIR/RWCP_REVERB_AACHEN/real_rirs_isotropic_noises/"
+final_path = "/cephfs/anton/room-impulse-responses/AIR/RWCP_REVERB_AACHEN/AACHEN/"
+tfs =16000
+file_label = open("RT60.txt","w")
+for root, dirnames, filenames in os.walk(folder_path):
+    for filename in filenames:
+        if filename.endswith(".wav"):
+            ACE_Path = os.path.join(root, filename)
+            wave,fs = sf.read(ACE_Path)
+            channel = int(wave.size/len(wave))
+            if(channel == 1):
+                wave_single = wave #librosa.resample(wave, fs, tfs)
+                max_loc = np.where(wave_single == np.amax(wave_single))
+                min_loc = np.where(wave_single == np.amin(wave_single))
+                start = min(max_loc[0][0],min_loc[0][0])
+                wave_single =wave_single[start:len(wave_single)]
+                T60_val = RT60.t60_impulse(wave_single,tfs)
+                if(T60_val<1):
+                    file_label.write(str(T60_val)+"\n")
+                    save_path = final_path+ filename
+                    write(save_path,tfs,wave_single.astype(np.float32))
+            else:
+                for n in range(channel):
+                    wave_single   = wave[:,n]#librosa.resample(wave[:,n], fs, tfs)
+                    max_loc = np.where(wave_single == np.amax(wave_single))
+                    min_loc = np.where(wave_single == np.amin(wave_single))
+                    start = min(max_loc[0][0],min_loc[0][0])
+                    wave_single =wave_single[start:len(wave_single)]
+                    T60_val = RT60.t60_impulse(wave_single,tfs)
+                if(T60_val<1):
+                        file_label.write(str(T60_val)+"\n")
+                        save_path = final_path+filename+str(n)+".wav"
+                        write(save_path,tfs,wave_single.astype(np.float32))

code_new/trainer.py ADDED Viewed

	@@ -0,0 +1,392 @@

+from __future__ import print_function
+from six.moves import range
+from PIL import Image
+import torch.backends.cudnn as cudnn
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import torch.optim as optim
+import os
+import time
+import numpy as np
+import torchfile
+import pickle
+import soundfile as sf
+import re
+import math
+from wavefile import WaveWriter, Format
+from miscc.config import cfg
+from miscc.utils import mkdir_p
+from miscc.utils import weights_init
+from miscc.utils import save_RIR_results, save_model
+from miscc.utils import KL_loss
+from miscc.utils import compute_discriminator_loss, compute_generator_loss
+# from torch.utils.tensorboard import summary
+# from torch.utils.tensorboard import FileWriter
+class GANTrainer(object):
+    def __init__(self, output_dir):
+        if cfg.TRAIN.FLAG:
+            self.model_dir = os.path.join(output_dir, 'Model')
+            self.model_dir_RT = os.path.join(output_dir, 'Model_RT')
+            self.RIR_dir = os.path.join(output_dir, 'RIR')
+            self.log_dir = os.path.join(output_dir, 'Log')
+            mkdir_p(self.model_dir)
+            mkdir_p(self.model_dir_RT)
+            mkdir_p(self.RIR_dir)
+            mkdir_p(self.log_dir)
+            # self.summary_writer = FileWriter(self.log_dir)
+        self.max_epoch = cfg.TRAIN.MAX_EPOCH
+        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL
+        s_gpus = cfg.GPU_ID.split(',')
+        self.gpus = [int(ix) for ix in s_gpus]
+        self.num_gpus = len(self.gpus)
+        self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus
+        torch.cuda.set_device(self.gpus[0])
+        cudnn.benchmark = True
+    # ############# For training stageI GAN #############
+    def load_network_stageI(self):
+        from model import STAGE1_G, STAGE1_D
+        netG = STAGE1_G()
+        netG.apply(weights_init)
+        print(netG)
+        netD = STAGE1_D()
+        netD.apply(weights_init)
+        print(netD)
+        if cfg.NET_G != '':
+            state_dict = \
+                torch.load(cfg.NET_G,
+                           map_location=lambda storage, loc: storage)
+            netG.load_state_dict(state_dict)
+            print('Load from: ', cfg.NET_G)
+        if cfg.NET_D != '':
+            state_dict = \
+                torch.load(cfg.NET_D,
+                           map_location=lambda storage, loc: storage)
+            netD.load_state_dict(state_dict)
+            print('Load from: ', cfg.NET_D)
+        if cfg.CUDA:
+            netG.cuda()
+            netD.cuda()
+        return netG, netD
+    # ############# For training stageII GAN  #############
+    def load_network_stageII(self):
+        from model import STAGE1_G, STAGE2_G, STAGE2_D
+        Stage1_G = STAGE1_G()
+        netG = STAGE2_G(Stage1_G)
+        netG.apply(weights_init)
+        print(netG)
+        if cfg.NET_G != '':
+            state_dict = \
+                torch.load(cfg.NET_G,
+                           map_location=lambda storage, loc: storage)
+            netG.load_state_dict(state_dict)
+            print('Load from: ', cfg.NET_G)
+        elif cfg.STAGE1_G != '':
+            state_dict = \
+                torch.load(cfg.STAGE1_G,
+                           map_location=lambda storage, loc: storage)
+            netG.STAGE1_G.load_state_dict(state_dict)
+            print('Load from: ', cfg.STAGE1_G)
+        else:
+            print("Please give the Stage1_G path")
+            return
+        netD = STAGE2_D()
+        netD.apply(weights_init)
+        if cfg.NET_D != '':
+            state_dict = \
+                torch.load(cfg.NET_D,
+                           map_location=lambda storage, loc: storage)
+            netD.load_state_dict(state_dict)
+            print('Load from: ', cfg.NET_D)
+        print(netD)
+        if cfg.CUDA:
+            netG.cuda()
+            netD.cuda()
+        return netG, netD
+    def train(self, data_loader, stage=1):
+        if stage == 1:
+            netG, netD = self.load_network_stageI()
+        else:
+            netG, netD = self.load_network_stageII()
+        # nz = cfg.Z_DIM
+        batch_size = self.batch_size
+        # noise = Variable(torch.FloatTensor(batch_size, nz))
+        # fixed_noise = \
+        #     Variable(torch.FloatTensor(batch_size, nz).normal_(0, 1),
+        #              volatile=True)
+        real_labels = Variable(torch.FloatTensor(batch_size).fill_(1))
+        fake_labels = Variable(torch.FloatTensor(batch_size).fill_(0))
+        if cfg.CUDA:
+            # noise, fixed_noise = noise.cuda(), fixed_noise.cuda()
+            real_labels, fake_labels = real_labels.cuda(), fake_labels.cuda()
+        generator_lr = cfg.TRAIN.GENERATOR_LR
+        discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR
+        lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH
+        # optimizerD = \
+        #     optim.Adam(netD.parameters(),
+        #                lr=cfg.TRAIN.DISCRIMINATOR_LR, betas=(0.5, 0.999))
+        optimizerD = \
+            optim.RMSprop(netD.parameters(),
+                       lr=cfg.TRAIN.DISCRIMINATOR_LR)
+        netG_para = []
+        for p in netG.parameters():
+            if p.requires_grad:
+                netG_para.append(p)
+        # optimizerG = optim.Adam(netG_para,
+        #                         lr=cfg.TRAIN.GENERATOR_LR,
+        #                         betas=(0.5, 0.999))
+        optimizerG = optim.RMSprop(netG_para,
+                                lr=cfg.TRAIN.GENERATOR_LR)
+        count = 0
+        least_RT=10
+        for epoch in range(self.max_epoch):
+            start_t = time.time()
+            if epoch % lr_decay_step == 0 and epoch > 0:
+                generator_lr *= 0.7#0.5
+                for param_group in optimizerG.param_groups:
+                    param_group['lr'] = generator_lr
+                discriminator_lr *= 0.7#0.5
+                for param_group in optimizerD.param_groups:
+                    param_group['lr'] = discriminator_lr
+            for i, data in enumerate(data_loader, 0):
+                ######################################################
+                # (1) Prepare training data
+                ######################################################
+                real_RIR_cpu, txt_embedding = data
+                real_RIRs = Variable(real_RIR_cpu)
+                txt_embedding = Variable(txt_embedding)
+                if cfg.CUDA:
+                    real_RIRs = real_RIRs.cuda()
+                    txt_embedding = txt_embedding.cuda()
+                #print("trianer RIRs ",real_RIRs.size())
+                #print("trianer embedding ",txt_embedding.size())
+                #######################################################
+                # (2) Generate fake images
+                ######################################################
+                # noise.data.normal_(0, 1)
+                # inputs = (txt_embedding, noise)
+                inputs = (txt_embedding)
+                # _, fake_RIRs, mu, logvar = \
+                #     nn.parallel.data_parallel(netG, inputs, self.gpus)
+                _, fake_RIRs,c_code = nn.parallel.data_parallel(netG, inputs, self.gpus)
+                ############################
+                # (3) Update D network
+                ###########################
+                netD.zero_grad()
+                errD, errD_real, errD_wrong, errD_fake = \
+                    compute_discriminator_loss(netD, real_RIRs, fake_RIRs,
+                                               real_labels, fake_labels,
+                                               c_code, self.gpus)
+                errD_total = errD*5
+                errD_total.backward()
+                optimizerD.step()
+                ############################
+                # (2) Update G network
+                ###########################
+                # kl_loss = KL_loss(mu, logvar)
+                netG.zero_grad()
+                errG,MSE_error,RT_error= compute_generator_loss(epoch,netD,real_RIRs, fake_RIRs,
+                                              real_labels, c_code, self.gpus)
+                errG_total = errG *5#+ kl_loss * cfg.TRAIN.COEFF.KL
+                errG_total.backward()
+                optimizerG.step()
+                for p in range(2):
+                    inputs = (txt_embedding)
+                    # _, fake_RIRs, mu, logvar = \
+                    #     nn.parallel.data_parallel(netG, inputs, self.gpus)
+                    _, fake_RIRs,c_code = nn.parallel.data_parallel(netG, inputs, self.gpus)
+                    netG.zero_grad()
+                    errG,MSE_error,RT_error  = compute_generator_loss(epoch,netD,real_RIRs, fake_RIRs,
+                                              real_labels, c_code, self.gpus)
+                    # kl_loss = KL_loss(mu, logvar)
+                    errG_total = errG *5#+ kl_loss * cfg.TRAIN.COEFF.KL
+                    errG_total.backward()
+                    optimizerG.step()
+                count = count + 1
+                if i % 100 == 0:
+                    # summary_D = summary.scalar('D_loss', errD.data[0])
+                    # summary_D_r = summary.scalar('D_loss_real', errD_real)
+                    # summary_D_w = summary.scalar('D_loss_wrong', errD_wrong)
+                    # summary_D_f = summary.scalar('D_loss_fake', errD_fake)
+                    # summary_G = summary.scalar('G_loss', errG.data[0])
+                    # summary_KL = summary.scalar('KL_loss', kl_loss.data[0])
+                    # summary_D = summary.scalar('D_loss', errD.data)
+                    # summary_D_r = summary.scalar('D_loss_real', errD_real)
+                    # summary_D_w = summary.scalar('D_loss_wrong', errD_wrong)
+                    # summary_D_f = summary.scalar('D_loss_fake', errD_fake)
+                    # summary_G = summary.scalar('G_loss', errG.data)
+                    # summary_KL = summary.scalar('KL_loss', kl_loss.data)
+                    # self.summary_writer.add_summary(summary_D, count)
+                    # self.summary_writer.add_summary(summary_D_r, count)
+                    # self.summary_writer.add_summary(summary_D_w, count)
+                    # self.summary_writer.add_summary(summary_D_f, count)
+                    # self.summary_writer.add_summary(summary_G, count)
+                    # self.summary_writer.add_summary(summary_KL, count)
+                    # save the image result for each epoch
+                    inputs = (txt_embedding)
+                    lr_fake, fake, _ = \
+                        nn.parallel.data_parallel(netG, inputs, self.gpus)
+                    if(epoch%self.snapshot_interval==0):
+                        save_RIR_results(real_RIR_cpu, fake, epoch, self.RIR_dir)
+                        if lr_fake is not None:
+                            save_RIR_results(None, lr_fake, epoch, self.RIR_dir)
+            end_t = time.time()
+            # print('''[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f Loss_KL: %.4f
+            #          Loss_real: %.4f Loss_wrong:%.4f Loss_fake %.4f
+            #          Total Time: %.2fsec
+            #       '''
+            #       % (epoch, self.max_epoch, i, len(data_loader),
+            #          errD.data[0], errG.data[0], kl_loss.data[0],
+            #          errD_real, errD_wrong, errD_fake, (end_t - start_t)))
+            # print('''[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f Loss_KL: %.4f
+            #          Loss_real: %.4f Loss_wrong:%.4f Loss_fake %.4f
+            #          Total Time: %.2fsec
+            #       '''
+            #       % (epoch, self.max_epoch, i, len(data_loader),
+            #          errD.data, errG.data, kl_loss.data,
+            #          errD_real, errD_wrong, errD_fake, (end_t - start_t)))
+            print('''[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f
+                     Loss_real: %.4f Loss_wrong:%.4f Loss_fake %.4f   MSE_ERROR  %.4f RT_error %.4f
+                     Total Time: %.2fsec
+                  '''
+                  % (epoch, self.max_epoch, i, len(data_loader),
+                     errD.data, errG.data,
+                     errD_real, errD_wrong, errD_fake,MSE_error*4096, RT_error,(end_t - start_t)))
+            store_to_file ="[{}/{}][{}/{}] Loss_D: {:.4f} Loss_G: {:.4f} Loss_real: {:.4f} Loss_wrong:{:.4f} Loss_fake {:.4f}  MSE Error:{:.4f} RT_error{:.4f} Total Time: {:.2f}sec".format(epoch, self.max_epoch, i, len(data_loader),
+                     errD.data, errG.data, errD_real, errD_wrong, errD_fake,MSE_error*4096,RT_error, (end_t - start_t))
+            store_to_file =store_to_file+"\n"
+            with open("errors.txt", "a") as myfile:
+                myfile.write(store_to_file)
+            if (RT_error<least_RT):
+                least_RT = RT_error
+                save_model(netG, netD, epoch, self.model_dir_RT)
+            if epoch % self.snapshot_interval == 0:
+                save_model(netG, netD, epoch, self.model_dir)
+        #
+        save_model(netG, netD, self.max_epoch, self.model_dir)
+        #
+        # self.summary_writer.close()
+    def sample(self,file_path,stage=1):
+        if stage == 1:
+            netG, _ = self.load_network_stageI()
+        else:
+            netG, _ = self.load_network_stageII()
+        netG.eval()
+        time_list =[]
+        embedding_path = file_path
+        with open(embedding_path, 'rb') as f:
+            embeddings_pickle = pickle.load(f)
+        embeddings_list =[]
+        num_embeddings = len(embeddings_pickle)
+        for b in range (num_embeddings):
+            embeddings_list.append(embeddings_pickle[b])
+        embeddings = np.array(embeddings_list)
+        save_dir_GAN = "Generated_RIRs"
+        mkdir_p(save_dir_GAN)
+        normalize_embedding = []
+        batch_size = np.minimum(num_embeddings, self.batch_size)
+        count = 0
+        count_this = 0
+        while count < num_embeddings:
+            iend = count + batch_size
+            if iend > num_embeddings:
+                iend = num_embeddings
+                count = num_embeddings - batch_size
+            embeddings_batch = embeddings[count:iend]
+            txt_embedding = Variable(torch.FloatTensor(embeddings_batch))
+            if cfg.CUDA:
+                txt_embedding = txt_embedding.cuda()
+            #######################################################
+             # (2) Generate fake images
+            ######################################################
+            start_t = time.time()
+            inputs = (txt_embedding)
+            _, fake_RIRs,c_code = \
+                nn.parallel.data_parallel(netG, inputs, self.gpus)
+            end_t = time.time()
+            diff_t = end_t - start_t
+            time_list.append(diff_t)
+            RIR_batch_size = batch_size #int(batch_size/2)
+            print("batch_size ", RIR_batch_size)
+            channel_size = 64
+            for i in range(channel_size):
+                fs =16000
+                wave_name = "RIR-"+str(count+i)+".wav"
+                save_name_GAN = '%s/%s' % (save_dir_GAN,wave_name)
+                print("wave : ",save_name_GAN)
+                res = {}
+                res_buffer = []
+                rate = 16000
+                res['rate'] = rate
+                wave_GAN = fake_RIRs[i].data.cpu().numpy()
+                wave_GAN = np.array(wave_GAN[0])
+                res_buffer.append(wave_GAN)
+                res['samples'] = np.zeros((len(res_buffer), np.max([len(ps) for ps in res_buffer])))
+                for i, c in enumerate(res_buffer):
+                    res['samples'][i, :len(c)] = c
+                w = WaveWriter(save_name_GAN, channels=np.shape(res['samples'])[0], samplerate=int(res['rate']))
+                w.write(np.array(res['samples']))
+            print("counter = ",count)
+            count = count+64
+            count_this = count_this+1

download_data.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+gdown https://drive.google.com/uc?id=17NF1MVtXaWe9zhqWJqmG5tFUZb_9X0M5
+unzip data.zip
+mkdir output

download_generate.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gdown https://drive.google.com/uc?id=1XOyzsZD3s_pkZBlWcH3KtCR9YpjRVbHG
2	+ unzip generate.zip

example1.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+import numpy as np
+import random
+import argparse
+import pickle
+normalize_geometry_embeddings_list =[]
+for n in range(960):
+	lx = (8/960)*n + 0.5
+	geometry_embeddings= [lx,3.5,1.5,8.8,3.5,1.5,9,7,3,0.35]
+	max_dimension = 5
+	normalize_geometry_embeddings =np.divide(geometry_embeddings,max_dimension)-1
+	normalize_geometry_embeddings_list.append(normalize_geometry_embeddings)
+embeddings_pickle ="example1.pickle"
+with open(embeddings_pickle, 'wb') as f:
+    pickle.dump(normalize_geometry_embeddings_list, f, protocol=2)

slides.pptx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f928a9b1f7bc05d972e51988104bb547e9cce25bb03f7841023807050af65875
+size 4718146