diff --git a/.gitignore b/.gitignore index 4aa92d4..a26c220 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ -.DS_Store -bard -logmind -logmind-go -chatbridge -chroma/js +node_modules/ +.next/ +__pycache__/ +.env* diff --git a/midi_meta/.gitignore b/midi_meta/.gitignore new file mode 100644 index 0000000..cb7b2f0 --- /dev/null +++ b/midi_meta/.gitignore @@ -0,0 +1,2 @@ +.env +*.db \ No newline at end of file diff --git a/midi_meta/Makefile b/midi_meta/Makefile new file mode 100644 index 0000000..67bfe2e --- /dev/null +++ b/midi_meta/Makefile @@ -0,0 +1,26 @@ + +-include .env +export + +.PHONY: help +help: ## Prints out the description of the Make targets available + @grep -E '^[a-zA-Z0-9_-]+:.*?## ' Makefile | awk 'BEGIN {FS = ":.*?## "}; {printf "%-20s %s\n", $$1, $$2}' + +.PHONY: run-openai +run-openai: REQUIRED_ENV := OPENAI_API_KEY MODEL_NAME INPUT_FILE OUTPUT_FILE +run-openai: check-env ## Runs the metadata gather process on the specified input file + python main.py --model-type openai --model-name "${MODEL_NAME}" "${INPUT_FILE}" "${OUTPUT_FILE}" + +write-updates: REQUIRED_ENV := OUTPUT_DIR METADATA_FILE MIDI_DIR +write-updates: check-env ## Saves the new metadata into an updated set of MIDI file(s) + python meta_writer.py --output-dir "${OUTPUT_DIR}" "${METADATA_FILE}" "${MIDI_DIR}" + + +.PHONY: check-env +check-env: + @for var in $${REQUIRED_ENV}; do \ + if [ -z "$${!var}" ]; then \ + echo "Missing required environment variable: $$var"; \ + exit 1; \ + fi; \ + done \ No newline at end of file diff --git a/midi_meta/Pipfile b/midi_meta/Pipfile new file mode 100644 index 0000000..18f573e --- /dev/null +++ b/midi_meta/Pipfile @@ -0,0 +1,14 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +openai = "*" +ollama = "*" +mido = "*" + +[dev-packages] + +[requires] +python_version = "3.12" diff --git a/midi_meta/Pipfile.lock b/midi_meta/Pipfile.lock new file mode 100644 index 0000000..c2da20f --- /dev/null +++ b/midi_meta/Pipfile.lock @@ -0,0 +1,347 @@ +{ + "_meta": { + "hash": { + "sha256": "cd4a006ed23a1a39e8c631330a4a67ab5be34cdc2e1f7c6e772feab81804393d" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.12" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "annotated-types": { + "hashes": [ + "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", + "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89" + ], + "markers": "python_version >= '3.8'", + "version": "==0.7.0" + }, + "anyio": { + "hashes": [ + "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", + "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c" + ], + "markers": "python_version >= '3.9'", + "version": "==4.9.0" + }, + "certifi": { + "hashes": [ + "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", + "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3" + ], + "markers": "python_version >= '3.6'", + "version": "==2025.4.26" + }, + "distro": { + "hashes": [ + "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", + "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2" + ], + "markers": "python_version >= '3.6'", + "version": "==1.9.0" + }, + "h11": { + "hashes": [ + "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", + "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" + ], + "markers": "python_version >= '3.8'", + "version": "==0.16.0" + }, + "httpcore": { + "hashes": [ + "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", + "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.9" + }, + "httpx": { + "hashes": [ + "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", + "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad" + ], + "markers": "python_version >= '3.8'", + "version": "==0.28.1" + }, + "idna": { + "hashes": [ + "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", + "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3" + ], + "markers": "python_version >= '3.6'", + "version": "==3.10" + }, + "jiter": { + "hashes": [ + "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d", + "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a", + "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0", + "sha256:0c058ecb51763a67f019ae423b1cbe3fa90f7ee6280c31a1baa6ccc0c0e2d06e", + "sha256:113f30f87fb1f412510c6d7ed13e91422cfd329436364a690c34c8b8bd880c42", + "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4", + "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51", + "sha256:1537a890724ba00fdba21787010ac6f24dad47f763410e9e1093277913592784", + "sha256:161d461dcbe658cf0bd0aa375b30a968b087cdddc624fc585f3867c63c6eca95", + "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3", + "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5", + "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42", + "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5", + "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635", + "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc", + "sha256:2685f44bf80e95f8910553bf2d33b9c87bf25fceae6e9f0c1355f75d2922b0ee", + "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7", + "sha256:27cd1f2e8bb377f31d3190b34e4328d280325ad7ef55c6ac9abde72f79e84d2e", + "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75", + "sha256:351f4c90a24c4fb8c87c6a73af2944c440494ed2bea2094feecacb75c50398ae", + "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b", + "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572", + "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d", + "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69", + "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965", + "sha256:4a2d16360d0642cd68236f931b85fe50288834c383492e4279d9f1792e309571", + "sha256:4feafe787eb8a8d98168ab15637ca2577f6ddf77ac6c8c66242c2d028aa5420e", + "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b", + "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e", + "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d", + "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b", + "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e", + "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06", + "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5", + "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af", + "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678", + "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", + "sha256:7825f46e50646bee937e0f849d14ef3a417910966136f59cd1eb848b8b5bb3e4", + "sha256:7a9aaa5102dba4e079bb728076fadd5a2dca94c05c04ce68004cfd96f128ea34", + "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11", + "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad", + "sha256:8793b6df019b988526f5a633fdc7456ea75e4a79bd8396a3373c371fc59f5c9b", + "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58", + "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd", + "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708", + "sha256:9897115ad716c48f0120c1f0c4efae348ec47037319a6c63b2d7838bb53aaef4", + "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea", + "sha256:9ef340fae98065071ccd5805fe81c99c8f80484e820e40043689cf97fb66b3e2", + "sha256:9f3c848209ccd1bfa344a1240763975ca917de753c7875c77ec3034f4151d06c", + "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d", + "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103", + "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2", + "sha256:a7954a401d0a8a0b8bc669199db78af435aae1e3569187c2939c477c53cb6a0a", + "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893", + "sha256:c0194f813efdf4b8865ad5f5c5f50f8566df7d770a82c51ef593d09e0b347020", + "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322", + "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7", + "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15", + "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4", + "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51", + "sha256:d45807b0f236c485e1e525e2ce3a854807dfe28ccf0d013dd4a563395e28008a", + "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2", + "sha256:d82a811928b26d1a6311a886b2566f68ccf2b23cf3bfed042e18686f1f22c2d7", + "sha256:d838650f6ebaf4ccadfb04522463e74a4c378d7e667e0eb1865cfe3990bfac49", + "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043", + "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12", + "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d", + "sha256:e3630ec20cbeaddd4b65513fa3857e1b7c4190d4481ef07fb63d0fad59033321", + "sha256:e84ed1c9c9ec10bbb8c37f450077cbe3c0d4e8c2b19f0a49a60ac7ace73c7452", + "sha256:e8b36d8a16a61993be33e75126ad3d8aa29cf450b09576f3c427d27647fcb4aa", + "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419", + "sha256:efb767d92c63b2cd9ec9f24feeb48f49574a713870ec87e9ba0c2c6e9329c3e2", + "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001", + "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53", + "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc", + "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538" + ], + "markers": "python_version >= '3.8'", + "version": "==0.9.0" + }, + "mido": { + "hashes": [ + "sha256:01033c9b10b049e4436fca2762194ca839b09a4334091dd3c34e7f4ae674fd8a", + "sha256:1aecb30b7f282404f17e43768cbf74a6a31bf22b3b783bdd117a1ce9d22cb74c" + ], + "index": "pypi", + "markers": "python_version ~= '3.7'", + "version": "==1.3.3" + }, + "ollama": { + "hashes": [ + "sha256:04312af2c5e72449aaebac4a2776f52ef010877c554103419d3f36066fe8af4c", + "sha256:1121439d49b96fa8339842965d0616eba5deb9f8c790786cdf4c0b3df4833802" + ], + "index": "pypi", + "markers": "python_version >= '3.8' and python_version < '4.0'", + "version": "==0.4.8" + }, + "openai": { + "hashes": [ + "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", + "sha256:fd2bfaf4608f48102d6b74f9e11c5ecaa058b60dad9c36e409c12477dfd91fb2" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.76.0" + }, + "packaging": { + "hashes": [ + "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", + "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" + ], + "markers": "python_version >= '3.8'", + "version": "==25.0" + }, + "pydantic": { + "hashes": [ + "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3", + "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f" + ], + "markers": "python_version >= '3.9'", + "version": "==2.11.3" + }, + "pydantic-core": { + "hashes": [ + "sha256:0483847fa9ad5e3412265c1bd72aad35235512d9ce9d27d81a56d935ef489672", + "sha256:048831bd363490be79acdd3232f74a0e9951b11b2b4cc058aeb72b22fdc3abe1", + "sha256:048c01eee07d37cbd066fc512b9d8b5ea88ceeb4e629ab94b3e56965ad655add", + "sha256:049e0de24cf23766f12cc5cc71d8abc07d4a9deb9061b334b62093dedc7cb068", + "sha256:08530b8ac922003033f399128505f513e30ca770527cc8bbacf75a84fcc2c74b", + "sha256:0fb935c5591573ae3201640579f30128ccc10739b45663f93c06796854405505", + "sha256:1293d7febb995e9d3ec3ea09caf1a26214eec45b0f29f6074abb004723fc1de8", + "sha256:177d50460bc976a0369920b6c744d927b0ecb8606fb56858ff542560251b19e5", + "sha256:1a28239037b3d6f16916a4c831a5a0eadf856bdd6d2e92c10a0da3a59eadcf3e", + "sha256:1b30d92c9412beb5ac6b10a3eb7ef92ccb14e3f2a8d7732e2d739f58b3aa7544", + "sha256:1c607801d85e2e123357b3893f82c97a42856192997b95b4d8325deb1cd0c5f4", + "sha256:1d20eb4861329bb2484c021b9d9a977566ab16d84000a57e28061151c62b349a", + "sha256:1dfae24cf9921875ca0ca6a8ecb4bb2f13c855794ed0d468d6abbec6e6dcd44a", + "sha256:25626fb37b3c543818c14821afe0fd3830bc327a43953bc88db924b68c5723f1", + "sha256:282b3fe1bbbe5ae35224a0dbd05aed9ccabccd241e8e6b60370484234b456266", + "sha256:2ea62419ba8c397e7da28a9170a16219d310d2cf4970dbc65c32faf20d828c83", + "sha256:2f593494876eae852dc98c43c6f260f45abdbfeec9e4324e31a481d948214764", + "sha256:2f9284e11c751b003fd4215ad92d325d92c9cb19ee6729ebd87e3250072cdcde", + "sha256:3077cfdb6125cc8dab61b155fdd714663e401f0e6883f9632118ec12cf42df26", + "sha256:32cd11c5914d1179df70406427097c7dcde19fddf1418c787540f4b730289896", + "sha256:338ea9b73e6e109f15ab439e62cb3b78aa752c7fd9536794112e14bee02c8d18", + "sha256:35a5ec3fa8c2fe6c53e1b2ccc2454398f95d5393ab398478f53e1afbbeb4d939", + "sha256:398a38d323f37714023be1e0285765f0a27243a8b1506b7b7de87b647b517e48", + "sha256:3a371dc00282c4b84246509a5ddc808e61b9864aa1eae9ecc92bb1268b82db4a", + "sha256:3a64e81e8cba118e108d7126362ea30e021291b7805d47e4896e52c791be2761", + "sha256:3ab2d36e20fbfcce8f02d73c33a8a7362980cff717926bbae030b93ae46b56c7", + "sha256:3f1fdb790440a34f6ecf7679e1863b825cb5ffde858a9197f851168ed08371e5", + "sha256:3f2648b9262607a7fb41d782cc263b48032ff7a03a835581abbf7a3bec62bcf5", + "sha256:401d7b76e1000d0dd5538e6381d28febdcacb097c8d340dde7d7fc6e13e9f95d", + "sha256:495bc156026efafd9ef2d82372bd38afce78ddd82bf28ef5276c469e57c0c83e", + "sha256:4b315e596282bbb5822d0c7ee9d255595bd7506d1cb20c2911a4da0b970187d3", + "sha256:5183e4f6a2d468787243ebcd70cf4098c247e60d73fb7d68d5bc1e1beaa0c4db", + "sha256:5277aec8d879f8d05168fdd17ae811dd313b8ff894aeeaf7cd34ad28b4d77e33", + "sha256:52928d8c1b6bda03cc6d811e8923dffc87a2d3c8b3bfd2ce16471c7147a24850", + "sha256:549150be302428b56fdad0c23c2741dcdb5572413776826c965619a25d9c6bde", + "sha256:5773da0ee2d17136b1f1c6fbde543398d452a6ad2a7b54ea1033e2daa739b8d2", + "sha256:5ab77f45d33d264de66e1884fca158bc920cb5e27fd0764a72f72f5756ae8bdb", + "sha256:5c834f54f8f4640fd7e4b193f80eb25a0602bba9e19b3cd2fc7ffe8199f5ae02", + "sha256:5ccd429694cf26af7997595d627dd2637e7932214486f55b8a357edaac9dae8c", + "sha256:681d65e9011f7392db5aa002b7423cc442d6a673c635668c227c6c8d0e5a4f77", + "sha256:694ad99a7f6718c1a498dc170ca430687a39894a60327f548e02a9c7ee4b6504", + "sha256:6dd8ecfde08d8bfadaea669e83c63939af76f4cf5538a72597016edfa3fad516", + "sha256:6e966fc3caaf9f1d96b349b0341c70c8d6573bf1bac7261f7b0ba88f96c56c24", + "sha256:70af6a21237b53d1fe7b9325b20e65cbf2f0a848cf77bed492b029139701e66a", + "sha256:723c5630c4259400818b4ad096735a829074601805d07f8cafc366d95786d331", + "sha256:7965c13b3967909a09ecc91f21d09cfc4576bf78140b988904e94f130f188396", + "sha256:7aeb055a42d734c0255c9e489ac67e75397d59c6fbe60d155851e9782f276a9c", + "sha256:7edbc454a29fc6aeae1e1eecba4f07b63b8d76e76a748532233c4c167b4cb9ea", + "sha256:7fb66263e9ba8fea2aa85e1e5578980d127fb37d7f2e292773e7bc3a38fb0c7b", + "sha256:87d3776f0001b43acebfa86f8c64019c043b55cc5a6a2e313d728b5c95b46969", + "sha256:8ab581d3530611897d863d1a649fb0644b860286b4718db919bfd51ece41f10b", + "sha256:8d13f0276806ee722e70a1c93da19748594f19ac4299c7e41237fc791d1861ea", + "sha256:8ffab8b2908d152e74862d276cf5017c81a2f3719f14e8e3e8d6b83fda863927", + "sha256:902dbc832141aa0ec374f4310f1e4e7febeebc3256f00dc359a9ac3f264a45dc", + "sha256:9097b9f17f91eea659b9ec58148c0747ec354a42f7389b9d50701610d86f812e", + "sha256:91815221101ad3c6b507804178a7bb5cb7b2ead9ecd600041669c8d805ebd595", + "sha256:948b73114f47fd7016088e5186d13faf5e1b2fe83f5e320e371f035557fd264d", + "sha256:99b56acd433386c8f20be5c4000786d1e7ca0523c8eefc995d14d79c7a081498", + "sha256:9d3da303ab5f378a268fa7d45f37d7d85c3ec19769f28d2cc0c61826a8de21fe", + "sha256:9f466e8bf0a62dc43e068c12166281c2eca72121dd2adc1040f3aa1e21ef8599", + "sha256:9fea9c1869bb4742d174a57b4700c6dadea951df8b06de40c2fedb4f02931c2e", + "sha256:a0d5f3acc81452c56895e90643a625302bd6be351e7010664151cc55b7b97f89", + "sha256:a3edde68d1a1f9af1273b2fe798997b33f90308fb6d44d8550c89fc6a3647cf6", + "sha256:a62c3c3ef6a7e2c45f7853b10b5bc4ddefd6ee3cd31024754a1a5842da7d598d", + "sha256:aa687a23d4b7871a00e03ca96a09cad0f28f443690d300500603bd0adba4b523", + "sha256:ab0277cedb698749caada82e5d099dc9fed3f906a30d4c382d1a21725777a1e5", + "sha256:ad05b683963f69a1d5d2c2bdab1274a31221ca737dbbceaa32bcb67359453cdd", + "sha256:b172f7b9d2f3abc0efd12e3386f7e48b576ef309544ac3a63e5e9cdd2e24585d", + "sha256:b1caa0bc2741b043db7823843e1bde8aaa58a55a58fda06083b0569f8b45693a", + "sha256:bae370459da6a5466978c0eacf90690cb57ec9d533f8e63e564ef3822bfa04fe", + "sha256:bcc9c6fdb0ced789245b02b7d6603e17d1563064ddcfc36f046b61c0c05dd9df", + "sha256:bdc84017d28459c00db6f918a7272a5190bec3090058334e43a76afb279eac7c", + "sha256:bfd0adeee563d59c598ceabddf2c92eec77abcb3f4a391b19aa7366170bd9e30", + "sha256:c566dd9c5f63d22226409553531f89de0cac55397f2ab8d97d6f06cfce6d947e", + "sha256:c91dbb0ab683fa0cd64a6e81907c8ff41d6497c346890e26b23de7ee55353f96", + "sha256:c964fd24e6166420d18fb53996d8c9fd6eac9bf5ae3ec3d03015be4414ce497f", + "sha256:cc77ec5b7e2118b152b0d886c7514a4653bcb58c6b1d760134a9fab915f777b3", + "sha256:d100e3ae783d2167782391e0c1c7a20a31f55f8015f3293647544df3f9c67824", + "sha256:d3a07fadec2a13274a8d861d3d37c61e97a816beae717efccaa4b36dfcaadcde", + "sha256:d5e3d15245b08fa4a84cefc6c9222e6f37c98111c8679fbd94aa145f9a0ae23d", + "sha256:de9e06abe3cc5ec6a2d5f75bc99b0bdca4f5c719a5b34026f8c57efbdecd2ee3", + "sha256:df6a94bf9452c6da9b5d76ed229a5683d0306ccb91cca8e1eea883189780d568", + "sha256:e100c52f7355a48413e2999bfb4e139d2977a904495441b374f3d4fb4a170961", + "sha256:e11f3864eb516af21b01e25fac915a82e9ddad3bb0fb9e95a246067398b435a4", + "sha256:e14f369c98a7c15772b9da98987f58e2b509a93235582838bd0d1d8c08b68fda", + "sha256:e3de2777e3b9f4d603112f78006f4ae0acb936e95f06da6cb1a45fbad6bdb4b5", + "sha256:e7aaba1b4b03aaea7bb59e1b5856d734be011d3e6d98f5bcaa98cb30f375f2ad", + "sha256:ec259f62538e8bf364903a7d0d0239447059f9434b284f5536e8402b7dd198db", + "sha256:ec79de2a8680b1a67a07490bddf9636d5c2fab609ba8c57597e855fa5fa4dacd", + "sha256:ed3eb16d51257c763539bde21e011092f127a2202692afaeaccb50db55a31383", + "sha256:ede9b407e39949d2afc46385ce6bd6e11588660c26f80576c11c958e6647bc40", + "sha256:ee12a7be1742f81b8a65b36c6921022301d466b82d80315d215c4c691724986f", + "sha256:ef99779001d7ac2e2461d8ab55d3373fe7315caefdbecd8ced75304ae5a6fc6b", + "sha256:f59295ecc75a1788af8ba92f2e8c6eeaa5a94c22fc4d151e8d9638814f85c8fc", + "sha256:f995719707e0e29f0f41a8aa3bcea6e761a36c9136104d3189eafb83f5cec5e5", + "sha256:f99aeda58dce827f76963ee87a0ebe75e648c72ff9ba1174a253f6744f518f65", + "sha256:fc6bf8869e193855e8d91d91f6bf59699a5cdfaa47a404e278e776dd7f168b39", + "sha256:fc903512177361e868bc1f5b80ac8c8a6e05fcdd574a5fb5ffeac5a9982b9e89", + "sha256:fe44d56aa0b00d66640aa84a3cbe80b7a3ccdc6f0b1ca71090696a6d4777c091" + ], + "markers": "python_version >= '3.9'", + "version": "==2.33.1" + }, + "sniffio": { + "hashes": [ + "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", + "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.1" + }, + "tqdm": { + "hashes": [ + "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", + "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2" + ], + "markers": "python_version >= '3.7'", + "version": "==4.67.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", + "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef" + ], + "markers": "python_version >= '3.8'", + "version": "==4.13.2" + }, + "typing-inspection": { + "hashes": [ + "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", + "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122" + ], + "markers": "python_version >= '3.9'", + "version": "==0.4.0" + } + }, + "develop": {} +} diff --git a/midi_meta/README.md b/midi_meta/README.md new file mode 100644 index 0000000..5f7482c --- /dev/null +++ b/midi_meta/README.md @@ -0,0 +1,40 @@ + +### Setup + +``` +$ PATH="/Users/martyross/.local/bin:$PATH" +``` + + +### Orchestrations + +#### Read the Metadata + +```shell +$ python meta_reader.py ~/repos/noodnik2/mzb/samples/src/240818-nuvi/gs/l100* --output ~/tmp/meta_reader_l100.json +``` + +#### Add the LMM Metadata + +```shell +$ python main.py --model-type openai --model-name gpt-4 ~/tmp/meta_reader_l100.json ~/tmp/lmm_output_l100.json +``` + +### Update the Database + +```shell +$ python fmm.py --update < ~/tmp/lmm_output_l100.json +``` + +### Render the `.m4a` from the `.mid` + +... + +### Produce the `exiftool` Commands + +Needs some work to select the files and link them to the new `.m4a` rendered versions. + +```shell +$ python fmm.py --query l1001_04.mid l1002_02.mid ... +``` + diff --git a/midi_meta/env-template b/midi_meta/env-template new file mode 100644 index 0000000..e570b8b --- /dev/null +++ b/midi_meta/env-template @@ -0,0 +1 @@ +OPENAI_API_KEY= diff --git a/midi_meta/file_metadata.db b/midi_meta/file_metadata.db new file mode 100644 index 0000000..3e6d45b Binary files /dev/null and b/midi_meta/file_metadata.db differ diff --git a/midi_meta/fmm.py b/midi_meta/fmm.py new file mode 100644 index 0000000..a66aec7 --- /dev/null +++ b/midi_meta/fmm.py @@ -0,0 +1,679 @@ +import json +import sqlite3 +from typing import Dict, List, Any, Optional + + +class FileMetadataManager: + """ + A class to manage file metadata using SQLite with JSON1 extension. + Provides methods to store, update, query, and delete metadata tags for files. + """ + + def __init__(self, db_path: str = "file_metadata.db"): + """ + Initialize the FileMetadataManager with a database path. + + Args: + db_path: Path to the SQLite database file + """ + self.db_path = db_path + self._initialize_db() + + def _initialize_db(self) -> None: + """Initialize the database with required tables if they don't exist.""" + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Enable foreign keys + cursor.execute("PRAGMA foreign_keys = ON") + + # Create files table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_path TEXT UNIQUE NOT NULL, + file_name TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """) + + # Create metadata table with JSON column for tags + cursor.execute(""" + CREATE TABLE IF NOT EXISTS metadata ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_id INTEGER NOT NULL, + tags JSON NOT NULL, + source TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE + ) + """) + + # Create indexes for better performance + cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(file_path)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_metadata_file_id ON metadata(file_id)") + + conn.commit() + + def add_file(self, file_path: str) -> int: + """ + Add a file to the database if it doesn't exist, or update its timestamp if it does. + + Args: + file_path: Full path to the file + + Returns: + file_id: ID of the file record + """ + file_name = os.path.basename(file_path) + now = datetime.now().isoformat() + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Check if file already exists + cursor.execute("SELECT id FROM files WHERE file_path = ?", (file_path,)) + result = cursor.fetchone() + + if result: + file_id = result[0] + cursor.execute( + "UPDATE files SET updated_at = ? WHERE id = ?", + (now, file_id) + ) + else: + cursor.execute( + "INSERT INTO files (file_path, file_name, created_at, updated_at) VALUES (?, ?, ?, ?)", + (file_path, file_name, now, now) + ) + file_id = cursor.lastrowid + + conn.commit() + return file_id + + def add_metadata(self, file_path: str, tags: Dict[str, Any], source: str) -> int: + """ + Add metadata tags for a file from a specific source. + + Args: + file_path: Path to the file + tags: Dictionary of metadata tags + source: Identifier for the metadata source (e.g., 'exif', 'analysis', 'user') + + Returns: + metadata_id: ID of the created metadata record + """ + file_id = self.add_file(file_path) + now = datetime.now().isoformat() + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Check if metadata for this file/source combination already exists + cursor.execute( + "SELECT id FROM metadata WHERE file_id = ? AND source = ?", + (file_id, source) + ) + result = cursor.fetchone() + + if result: + metadata_id = result[0] + cursor.execute( + "UPDATE metadata SET tags = json(?), updated_at = ? WHERE id = ?", + (json.dumps(tags), now, metadata_id) + ) + else: + cursor.execute( + "INSERT INTO metadata (file_id, tags, source, created_at, updated_at) VALUES (?, json(?), ?, ?, ?)", + (file_id, json.dumps(tags), source, now, now) + ) + metadata_id = cursor.lastrowid + + conn.commit() + return metadata_id + + def update_metadata(self, file_path: str, tags: Dict[str, Any], source: str) -> bool: + """ + Update existing metadata tags for a file from a specific source. + + Args: + file_path: Path to the file + tags: Dictionary of metadata tags to update (will be merged with existing) + source: Identifier for the metadata source + + Returns: + bool: True if metadata was updated, False if no matching record found + """ + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Get file_id + cursor.execute("SELECT id FROM files WHERE file_path = ?", (file_path,)) + file_result = cursor.fetchone() + + if not file_result: + return False + + file_id = file_result[0] + + # Get current metadata + cursor.execute( + "SELECT id, tags FROM metadata WHERE file_id = ? AND source = ?", + (file_id, source) + ) + metadata_result = cursor.fetchone() + + if not metadata_result: + return False + + metadata_id, current_tags_json = metadata_result + current_tags = json.loads(current_tags_json) + + # Merge tags + merged_tags = {**current_tags, **tags} + now = datetime.now().isoformat() + + # Update + cursor.execute( + "UPDATE metadata SET tags = json(?), updated_at = ? WHERE id = ?", + (json.dumps(merged_tags), now, metadata_id) + ) + + conn.commit() + return cursor.rowcount > 0 + + def get_metadata(self, file_path: str, source: Optional[str] = None) -> Dict[str, Any]: + """ + Get metadata for a file, optionally filtered by source. + + Args: + file_path: Path to the file + source: Optional source filter + + Returns: + Dict with metadata from all sources or the specified source + """ + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + # Get file_id + cursor.execute("SELECT id FROM files WHERE file_path = ?", (file_path,)) + file_result = cursor.fetchone() + + if not file_result: + return {} + + file_id = file_result["id"] + + # Query metadata + if source: + cursor.execute( + "SELECT source, tags FROM metadata WHERE file_id = ? AND source = ?", + (file_id, source) + ) + else: + cursor.execute( + "SELECT source, tags FROM metadata WHERE file_id = ?", + (file_id,) + ) + + results = cursor.fetchall() + + if not results: + return {} + + # If specific source requested, return just the tags + if source and results: + return json.loads(results[0]["tags"]) + + # Otherwise return dict of {source: tags} + metadata = {} + for row in results: + metadata[row["source"]] = json.loads(row["tags"]) + + return metadata + + def lookup_lmm_values(self, filename: str) -> Dict[str, Any]: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + query = """ + SELECT DISTINCT json_extract(m.tags, '$.lmm_values') + FROM files f + JOIN metadata m ON f.id = m.file_id + WHERE json_extract(m.tags, '$.filename') = ? + AND json_extract(m.tags, '$.lmm_values') IS NOT NULL + """ + + params = [filename] + + cursor.execute(query, params) + results = cursor.fetchall() + n_results = len(results) + if n_results > 1: + raise ValueError("too many results returned") + + if n_results == 0: + return dict() + + (lmm_json,) = results[0] + return json.loads(lmm_json) + + def find_files_by_tag(self, tag_path: str, value: Any = None, source: Optional[str] = None) -> List[str]: + """ + Find files by a specific tag path and optional value. + Uses JSON1 path extraction to query inside the JSON structure. + + Args: + tag_path: JSON path to the tag (e.g., '$.dimensions.width') + value: Optional value to match + source: Optional source to restrict search to + + Returns: + List of file paths matching the criteria + """ + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + query = """ + SELECT DISTINCT f.file_path + FROM files f + JOIN metadata m ON f.id = m.file_id + WHERE json_extract(m.tags, ?) {predicate} + """ + + params = [tag_path] + + # Add value comparison if provided + if value is not None: + if isinstance(value, (int, float, bool, str)): + predicate = "= ?" + else: + predicate = "= json(?)" + value = json.dumps(value) + params.append(value) + else: + predicate = "IS NOT NULL" + + # Add source filter if provided + if source: + query += " AND m.source = ?" + params.append(source) + + # Replace the predicate placeholder + query = query.format(predicate=predicate) + + cursor.execute(query, params) + results = cursor.fetchall() + + return [row[0] for row in results] + + def delete_metadata(self, file_path: str, source: Optional[str] = None) -> bool: + """ + Delete metadata for a file, optionally filtered by source. + + Args: + file_path: Path to the file + source: Optional source to delete (if None, deletes all metadata for the file) + + Returns: + bool: True if any metadata was deleted + """ + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Get file_id + cursor.execute("SELECT id FROM files WHERE file_path = ?", (file_path,)) + file_result = cursor.fetchone() + + if not file_result: + return False + + file_id = file_result[0] + + # Delete metadata + if source: + cursor.execute( + "DELETE FROM metadata WHERE file_id = ? AND source = ?", + (file_id, source) + ) + else: + cursor.execute("DELETE FROM metadata WHERE file_id = ?", (file_id,)) + + conn.commit() + return cursor.rowcount > 0 + + def delete_file(self, file_path: str) -> bool: + """ + Delete a file and all its associated metadata from the database. + + Args: + file_path: Path to the file + + Returns: + bool: True if the file was deleted + """ + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + cursor.execute("DELETE FROM files WHERE file_path = ?", (file_path,)) + conn.commit() + + return cursor.rowcount > 0 + + def get_files_by_source(self, source: str) -> List[str]: + """ + Get all files that have metadata from a specific source. + + Args: + source: Source identifier + + Returns: + List of file paths + """ + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + cursor.execute(""" + SELECT f.file_path + FROM files f + JOIN metadata m ON f.id = m.file_id + WHERE m.source = ? + ORDER BY f.file_path + """, (source,)) + + results = cursor.fetchall() + return [row[0] for row in results] + + def search_metadata(self, query: Dict[str, Any]) -> List[str]: + """ + Search for files with metadata matching complex criteria. + + Args: + query: Dictionary of search criteria where: + - Keys are tag paths (dot notation will be converted to JSON path) + - Values are the values to match + + Returns: + List of file paths matching all criteria + """ + # if not query: + # return [] + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Build the SQL query + sql = """ + SELECT f.file_path + FROM files f + JOIN metadata m ON f.id = m.file_id + WHERE 1=1 + """ + + params = [] + + # Add each search criterion + for tag_path, value in query.items(): + # Convert dot notation to JSON path if needed + if not tag_path.startswith('$'): + json_path = '$.' + tag_path + else: + json_path = tag_path + + if isinstance(value, (int, float, bool, str)): + sql += " AND json_extract(m.tags, ?) = ?" + else: + sql += " AND json_extract(m.tags, ?) = json(?)" + value = json.dumps(value) + + params.extend([json_path, value]) + + sql += " GROUP BY f.file_path HAVING COUNT(DISTINCT m.id) >= ?" + params.append(len(query)) + cursor.execute(sql, params) + results = cursor.fetchall() + + return [row[0] for row in results] + + def bulk_update_tags(self, file_paths: List[str], tags: Dict[str, Any], source: str) -> int: + updated_count = 0 + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.cursor() + + # Begin transaction + conn.execute("BEGIN TRANSACTION") + + try: + for file_path in file_paths: + # Get or create file + cursor.execute("SELECT id FROM files WHERE file_path = ?", (file_path,)) + file_result = cursor.fetchone() + + if not file_result: + file_name = os.path.basename(file_path) + now = datetime.now().isoformat() + cursor.execute( + "INSERT INTO files (file_path, file_name, created_at, updated_at) VALUES (?, ?, ?, ?)", + (file_path, file_name, now, now) + ) + file_id = cursor.lastrowid + else: + file_id = file_result[0] + now = datetime.now().isoformat() + cursor.execute( + "UPDATE files SET updated_at = ? WHERE id = ?", + (now, file_id) + ) + + # Get existing metadata + cursor.execute( + "SELECT id, tags FROM metadata WHERE file_id = ? AND source = ?", + (file_id, source) + ) + metadata_result = cursor.fetchone() + + if metadata_result: + metadata_id, current_tags_json = metadata_result + current_tags = json.loads(current_tags_json) + merged_tags = {**current_tags, **tags} + + cursor.execute( + "UPDATE metadata SET tags = json(?), updated_at = ? WHERE id = ?", + (json.dumps(merged_tags), now, metadata_id) + ) + else: + cursor.execute( + "INSERT INTO metadata (file_id, tags, source, created_at, updated_at) " + "VALUES (?, json(?), ?, ?, ?)", + (file_id, json.dumps(tags), source, now, now) + ) + + updated_count += 1 + + # Commit transaction + conn.commit() + except Exception as e: + conn.rollback() + raise e + + return updated_count + + def vacuum_database(self) -> None: + """ + Optimize the database by running VACUUM. + Should be run periodically for performance. + """ + with sqlite3.connect(self.db_path) as conn: + conn.execute("VACUUM") + + +def dump_database(db_name: str): + # Connect to the database + with sqlite3.connect(db_name) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + # Get all files + cursor.execute(""" + SELECT f.id, f.file_path, f.file_name, f.created_at, f.updated_at + FROM files f + ORDER BY f.file_path + """) + + files = cursor.fetchall() + print(f"\nFound {len(files)} files in database:") + + for file in files: + print(f"\n=== File: {file['file_path']} ===") + print(f" ID: {file['id']}") + print(f" Name: {file['file_name']}") + print(f" Created: {file['created_at']}") + print(f" Updated: {file['updated_at']}") + + # Get metadata for this file + cursor.execute(""" + SELECT m.source, m.tags, m.created_at, m.updated_at + FROM metadata m + WHERE m.file_id = ? + ORDER BY m.source + """, (file['id'],)) + + metadata_entries = cursor.fetchall() + print(f" Metadata entries: {len(metadata_entries)}") + + for entry in metadata_entries: + print(f"\n -- Source: {entry['source']} --") + print(f" Created: {entry['created_at']}") + print(f" Updated: {entry['updated_at']}") + + # Parse and print JSON tags + tags = json.loads(entry['tags']) + for tag_key, tag_value in tags.items(): + # Format JSON values for better readability + if isinstance(tag_value, dict): + print(f" {tag_key}:") + for sub_key, sub_value in tag_value.items(): + print(f" {sub_key}: {sub_value}") + else: + print(f" {tag_key}: {tag_value}") + + +def print_file_metadata(manager: FileMetadataManager, files: List[str]): + for file_path in files: + if os.path.isfile(file_path): + metadata = manager.get_metadata(file_path, source="file_system") + if metadata: + print(f"\nFile: {file_path}") + for key, value in metadata.items(): + print(f" {key}: {value}") + + +def process_files(manager: FileMetadataManager, files: List[str]): + for file in files: + # Ensure the file exists + if not os.path.isfile(file): + print(f"WARNING: {file} is not a file or doesn't exist. Skipping.") + continue + upsert_file(manager, file) + + +def upsert_file(manager: FileMetadataManager, file: str, values: Optional[Dict[str, Any]]): + file_stats = os.stat(file) + metadata = { + "filename": os.path.basename(file), + "directory": os.path.dirname(os.path.abspath(file)), + "size": file_stats.st_size, + "last_modified": datetime.fromtimestamp(file_stats.st_mtime).isoformat(), + "created": datetime.fromtimestamp(file_stats.st_ctime).isoformat(), + "accessed": datetime.fromtimestamp(file_stats.st_atime).isoformat(), + "file_mode": file_stats.st_mode + } + + if values: + metadata['lmm_values'] = values + + manager.add_metadata(file, metadata, source="file_system") + + +def gen_exiftool_command(filename: str, metadata: Dict[str, Any]): + exiftool_metadata_names = { + "title": "Title", + "artist": "Artist", + "album": "Album", + "genre": "Genre", + "year": "Year", + "comment": "Comment", + "copyright": "Copyright" + } + + parts = ["exiftool"] + for key, value in metadata.items(): + if value["confidence"] < 0.7: + continue + exiftool_metadata_name = exiftool_metadata_names[key] + parts.append(f'-{exiftool_metadata_name}={value["text"]!r}') # !r ensures proper quoting + + parts.append(f'"{filename}"') + + command = ' '.join(parts) + return command + + + +if __name__ == "__main__": + import sys + import os + import argparse + from datetime import datetime + + # Set up argument parsing + parser = argparse.ArgumentParser(description='Update file metadata in SQLite database.') + parser.add_argument('files', nargs='*', help='Files to process') + parser.add_argument('--db', default='file_metadata.db', help='Path to SQLite database file') + parser.add_argument('--update', action='store_true', help='Process updates from stdin') + parser.add_argument('--query', action='store_true', help='Query the database') + parser.add_argument('--dump', action='store_true', help='Dump the contents of the database') + args = parser.parse_args() + + # Initialize the metadata manager + manager = FileMetadataManager(db_path=args.db) + + if args.files: + print(f"\nProcessing {len(args.files)} files...") + process_files(manager, args.files) + print("\nStored Metadata:") + print_file_metadata(manager, args.files) + sys.exit(0) + + if args.update: + print("\nUpdating database contents:") + data = json.loads(sys.stdin.read()) + for file, values in data.items(): + print(f"Processing {file}:") + upsert_file(manager, file, values) + + sys.exit(0) + + if args.query: + print("\nQuerying database contents:") + # r = manager.search_metadata({"lmm_values.artist.name": "BROTHER-NAO"}) + # r = manager.find_files_by_tag("$.lmm_values.artist.name", "BROTHER-NAO") + r = manager.lookup_lmm_values('l1001_04.mid') + # print(f"result({r})") + cmd = gen_exiftool_command("newfn.m4a", r) + print(f"cmd({cmd})") + + sys.exit(0) + + if args.dump: + print("\nDumping database contents:") + dump_database(args.db) + sys.exit(0) + + parser.print_help() + sys.exit(1) diff --git a/midi_meta/main.py b/midi_meta/main.py new file mode 100644 index 0000000..1801b30 --- /dev/null +++ b/midi_meta/main.py @@ -0,0 +1,229 @@ +# PATH="/Users/martyross/.local/bin:$PATH" + +import json +import re +import os +import argparse +from typing import Dict, List, Any, Tuple, Optional +import logging + +# Configure different LLM providers +try: + import openai + OPENAI_AVAILABLE = True +except ImportError: + OPENAI_AVAILABLE = False + +try: + from ollama import Client as OllamaClient + OLLAMA_AVAILABLE = True +except ImportError: + OLLAMA_AVAILABLE = False + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class MidiMetadataExtractor: + def __init__(self, model_type: str = "openai", model_name: str = "gpt-4") -> None: + """ + Initialize the MIDI metadata extractor with the specified model. + + Args: + model_type: Type of model to use ('openai' or 'ollama') + model_name: Specific model name to use + """ + self.model_type = model_type.lower() + self.model_name = model_name + self.client = self._setup_client() + + def _setup_client(self) -> Any: + """Set up the appropriate LLM client based on configuration.""" + if self.model_type == "openai": + if not OPENAI_AVAILABLE: + raise ImportError("OpenAI package is not installed. Install with 'pip install openai'") + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + raise ValueError("OPENAI_API_KEY environment variable is not set") + return openai.OpenAI(api_key=api_key) + + elif self.model_type == "ollama": + if not OLLAMA_AVAILABLE: + raise ImportError("Ollama package is not installed. Install with 'pip install ollama'") + return OllamaClient() + + else: + raise ValueError(f"Unsupported model type: {self.model_type}") + + def _analyze_with_llm(self, metadata_tags: List[str]) -> Dict[str, Dict[str, str]]: + """ + Analyze metadata tags using the configured LLM. + + Args: + metadata_tags: List of metadata strings from the MIDI file + + Returns: + Dictionary with extracted metadata fields and confidence scores + """ + prompt = f""" + Analyze these MIDI file metadata tags and extract the most likely values for: + 1. Artist name + 2. Song title + 3. Song genre + 4. Publication year + 5. Copyright information + 6. An interesting comment or note + + For each field, provide your best guess and a confidence percentage (a number between 0 and 1). + + Metadata tags: + {json.dumps(metadata_tags, indent=2)} + + Return only a JSON object with this exact structure: + {{ + "artist": {{"text": "extracted artist", "confidence": 0.0}}, + "title": {{"text": "extracted title", "confidence": 0.0}}, + "genre": {{"text": "extracted genre", "confidence": 0.0}}, + "year": {{"text": "extracted year", "confidence": 0.0}}, + "copyright": {{"text": "extracted copyright", "confidence": 0.0}}, + "comment": {{"text": "extracted comment", "confidence": 0.0}} + }} + """ + + if self.model_type == "openai": + response = self.client.chat.completions.create( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + temperature=0.1 + ) + result_text = response.choices[0].message.content + + elif self.model_type == "ollama": + response = self.client.generate(model=self.model_name, prompt=prompt) + result_text = response['response'] + + else: + raise ValueError(f"Unsupported model type: {self.model_type}") + + # Extract JSON from the response + try: + # Find JSON object in the response (in case there's extra text) + json_match = re.search(r'({.*})', result_text.replace('\n', ' '), re.DOTALL) + if json_match: + result_text = json_match.group(1) + + result = json.loads(result_text) + return result + except json.JSONDecodeError as e: + logger.error(f"Failed to parse LLM response as JSON: {e}") + logger.debug(f"Raw response: {result_text}") + # Return a default structure with low confidence + return { + "artist": {"text": "Unknown", "confidence": 0.0}, + "title": {"text": "Unknown", "confidence": 0.0}, + "genre": {"text": "Unknown", "confidence": 0.0}, + "year": {"text": "Unknown", "confidence": 0.0}, + "copyright": {"text": "Unknown", "confidence": 0.0}, + "comment": {"text": "Unknown", "confidence": 0.0} + } + + def _clean_metadata_text(self, text: str) -> str: + """ + Clean up metadata text by removing null bytes and other problematic characters. + + Args: + text: Raw metadata text + + Returns: + Cleaned metadata text + """ + # Remove null bytes + text = text.replace('\\x00', '') + + # Remove common escape sequences + for esc in ['\\xc9', '\\xcd', '\\xba', '\\xc8']: + text = text.replace(esc, '') + + # Remove leading/trailing whitespace + text = text.strip() + + return text + + def _extract_tag_values(self, metadata_tags: List[str]) -> List[str]: + """ + Extract actual values from metadata tags, cleaning them in the process. + + Args: + metadata_tags: List of raw metadata tag strings + + Returns: + List of cleaned tag values + """ + values = [] + for tag in metadata_tags: + # Extract text content from tags + match = re.search(r'text: "(.*?)"', tag) + if match: + value = match.group(1) + cleaned_value = self._clean_metadata_text(value) + if cleaned_value: + values.append(cleaned_value) + return values + + def process_midi_metadata(self, input_file: str, output_file: str) -> None: + """ + Process MIDI metadata from an input JSON file and write results to an output file. + + Args: + input_file: Path to the input JSON file + output_file: Path to write the output JSON file + """ + try: + with open(input_file, 'r') as f: + midi_data = json.load(f) + + results = {} + total_files = len(midi_data) + + for i, (filename, metadata_tags) in enumerate(midi_data.items(), 1): + logger.info(f"Processing file {i}/{total_files}: {filename}") + + # Clean and extract the tag values + cleaned_tags = self._extract_tag_values(metadata_tags) + + # Analyze with LLM + metadata_results = self._analyze_with_llm(metadata_tags) + + # Store results for this file + results[filename] = metadata_results + + # Write results to output file + with open(output_file, 'w') as f: + json.dump(results, f, indent=2) + + logger.info(f"Results written to {output_file}") + + except Exception as e: + logger.error(f"Error processing MIDI metadata: {e}") + raise + + +def main(): + parser = argparse.ArgumentParser(description='Extract metadata from MIDI files') + parser.add_argument('input_file', help='Path to the input JSON file containing MIDI metadata tags') + parser.add_argument('output_file', help='Path to write the output JSON file') + parser.add_argument('--model-type', default='openai', choices=['openai', 'ollama'], + help='Type of LLM to use (default: openai)') + parser.add_argument('--model-name', default='gpt-4', + help='Name of the specific model to use (default: gpt-4)') + + args = parser.parse_args() + + # Create and run the extractor + extractor = MidiMetadataExtractor(model_type=args.model_type, model_name=args.model_name) + extractor.process_midi_metadata(args.input_file, args.output_file) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/midi_meta/meta_manager.py b/midi_meta/meta_manager.py new file mode 100644 index 0000000..d6f7451 --- /dev/null +++ b/midi_meta/meta_manager.py @@ -0,0 +1,87 @@ +import argparse +import logging +import os +from midi_metadata_extractor import MidiMetadataExtractor +from midi_metadata_writer import MidiMetadataWriter + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +def main(): + parser = argparse.ArgumentParser(description='Extract and write metadata for MIDI files') + + # Common arguments + parser.add_argument('--midi-dir', required=True, help='Directory containing the MIDI files') + + # Create subparsers for different operations + subparsers = parser.add_subparsers(dest='command', help='Command to execute') + + # Extract command + extract_parser = subparsers.add_parser('extract', help='Extract metadata from MIDI files') + extract_parser.add_argument('--input-json', required=True, + help='Path to the input JSON file containing MIDI metadata tags') + extract_parser.add_argument('--output-json', required=True, + help='Path to write the extracted metadata JSON file') + extract_parser.add_argument('--model-type', default='openai', choices=['openai', 'ollama'], + help='Type of LLM to use (default: openai)') + extract_parser.add_argument('--model-name', default='gpt-4', + help='Name of the specific model to use (default: gpt-4)') + + # Write command + write_parser = subparsers.add_parser('write', help='Write metadata to MIDI files') + write_parser.add_argument('--metadata-json', required=True, + help='Path to JSON file containing metadata to write') + write_parser.add_argument('--output-dir', + help='Directory to write modified MIDI files (if omitted, overwrites originals)') + + # Full workflow command + workflow_parser = subparsers.add_parser('workflow', help='Run full extraction and writing workflow') + workflow_parser.add_argument('--input-json', required=True, + help='Path to the input JSON file containing MIDI metadata tags') + workflow_parser.add_argument('--metadata-json', required=True, + help='Path to write the intermediate metadata JSON file') + workflow_parser.add_argument('--output-dir', + help='Directory to write modified MIDI files (if omitted, overwrites originals)') + workflow_parser.add_argument('--model-type', default='openai', choices=['openai', 'ollama'], + help='Type of LLM to use (default: openai)') + workflow_parser.add_argument('--model-name', default='gpt-4', + help='Name of the specific model to use (default: gpt-4)') + + args = parser.parse_args() + + if args.command == 'extract': + # Create and run the extractor + logger.info("Extracting metadata from MIDI files...") + extractor = MidiMetadataExtractor(model_type=args.model_type, model_name=args.model_name) + extractor.process_midi_metadata(args.input_json, args.output_json) + + elif args.command == 'write': + # Create and run the writer + logger.info("Writing metadata to MIDI files...") + writer = MidiMetadataWriter() + writer.process_files(args.metadata_json, args.midi_dir, args.output_dir) + + elif args.command == 'workflow': + # Run the full workflow + logger.info("Running full metadata workflow...") + + # Extract metadata + logger.info("Step 1: Extracting metadata...") + extractor = MidiMetadataExtractor(model_type=args.model_type, model_name=args.model_name) + extractor.process_midi_metadata(args.input_json, args.metadata_json) + + # Write metadata back to files + logger.info("Step 2: Writing metadata to MIDI files...") + writer = MidiMetadataWriter() + writer.process_files(args.metadata_json, args.midi_dir, args.output_dir) + + logger.info("Workflow completed successfully!") + + else: + parser.print_help() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/midi_meta/meta_reader.py b/midi_meta/meta_reader.py new file mode 100644 index 0000000..94551ab --- /dev/null +++ b/midi_meta/meta_reader.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 + +import os +import re +import json +import argparse +import logging +from typing import Dict, List, Any +from mido import MidiFile +from pathlib import Path +from typing import Iterator, Union + + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class MidiMetadataReader: + def __init__(self): + """Initialize the MIDI metadata reader.""" + pass + + def extract_metadata_tags(self, midi_path: str) -> List[str]: + """ + Extract metadata tags from a MIDI file. + + Args: + midi_path: Path to the MIDI file + + Returns: + List of metadata tag strings in the format specified + """ + metadata_tags = [] + + try: + midi = MidiFile(midi_path) + + for track_idx, track in enumerate(midi.tracks): + for msg in track: + if not msg.is_meta: + continue + + if msg.type in ('track_name', 'instrument_name'): + text = clean_text(msg.name) + if text: + metadata_tags.append(f"{msg.type}: {clean_text(msg.name)}") + elif msg.type in ('copyright', 'text', 'lyrics', 'marker', 'cue_point'): + text = clean_text(msg.text) + if text: + metadata_tags.append(f"{msg.type}: {clean_text(msg.text)}") + elif msg.type in 'time_signature': + metadata_tags.append(f"{msg.type}: {msg.numerator}") + elif msg.type in 'key_signature': + metadata_tags.append(f"{msg.type}: {msg.key}") + + # If we didn't find any metadata, add a note + if not metadata_tags: + metadata_tags.append('No metadata found') + + return metadata_tags + + except Exception as e: + logger.error(f"Error extracting metadata from {midi_path}: {e}") + return [f"Error: {str(e)}"] + + def process_files(self, midi_files: List[str], output_file: str = None) -> Dict[str, List[str]]: + """ + Process multiple MIDI files and extract their metadata. + + Args: + midi_files: List of paths to MIDI files + output_file: Optional path to write the output JSON + + Returns: + Dictionary with MIDI file paths as keys and metadata tag lists as values + """ + results = {} + + for midi_path in midi_files: + if not os.path.exists(midi_path): + logger.warning(f"File not found: {midi_path}") + continue + + logger.info(f"Processing {midi_path}") + + # Use the relative path as key, as in the example + # This preserves directory structure like "gs/11daes.mid" + rel_path = midi_path + if os.path.isabs(midi_path): + # Try to make the path relative if it's absolute + try: + rel_path = os.path.relpath(midi_path) + except ValueError: + # Keep as is if we can't make it relative + pass + + metadata_tags = self.extract_metadata_tags(midi_path) + results[rel_path] = metadata_tags + + # Write to output file if specified + if output_file: + with open(output_file, 'w') as f: + json.dump(results, f, indent=2) + logger.info(f"Results written to {output_file}") + + return results + + # def print_raw_midi_info(self, midi_path: str): + # """ + # Print raw MIDI message information for debugging purposes. + # + # Args: + # midi_path: Path to the MIDI file + # """ + # try: + # midi = MidiFile(midi_path) + # print(f"\nRaw MIDI information for {midi_path}:") + # + # for i, track in enumerate(midi.tracks): + # print(f"\nTrack {i}:") + # for msg in track: + # if msg.is_meta: + # print(f" Meta: {msg}") + # except Exception as e: + # print(f"Error reading MIDI file: {e}") + + +def find_midi_files(start_path: Union[str, Path]) -> Iterator[Path]: + """ + Recursively find all .mid files starting from the given path. + + Args: + start_path: The directory path to start searching from + + Yields: + Path objects for each .mid file found + """ + # Convert to Path object if a string is provided + start_path = Path(start_path) + + # Ensure the path exists and is a directory + if not start_path.exists(): + raise FileNotFoundError(f"The path {start_path} does not exist") + if not start_path.is_dir(): + raise NotADirectoryError(f"The path {start_path} is not a directory") + + # Walk through the directory tree + for item in start_path.rglob("*.mid"): + if item.is_file(): + yield item + + +def clean_text(text): + if not text: + return '' + # text = str(text).strip() + # Remove non-printable characters + text = re.sub(r'[^\x20-\x7E]', '', text) + return text.strip() + + +def main(): + parser = argparse.ArgumentParser(description='Extract metadata tags from MIDI files') + parser.add_argument('midi_files', nargs='+', help='Paths to MIDI files') + parser.add_argument('--output', '-o', help='Path to write the output JSON file') + parser.add_argument('--pretty', '-p', action='store_true', help='Print pretty JSON to console') + parser.add_argument('--debug', '-d', action='store_true', help='Print raw MIDI message information') + + args = parser.parse_args() + + # Create reader + reader = MidiMetadataReader() + + # # Debug mode - print raw message info + # if args.debug: + # for midi_path in args.midi_files: + # if os.path.exists(midi_path): + # reader.print_raw_midi_info(midi_path) + + # Initialize an empty list to store midi files + midi_files: List[str] = [] + + # Assuming args.midi_files is already defined and is an iterable + for mf in args.midi_files: + midi_files.append(mf) # Append each file to the list instead of using += + + # Check if there's exactly one file and it's a directory + # Using 'and' instead of '&' for logical AND operation + if len(midi_files) == 1 and os.path.isdir(midi_files[0]): + leading_path = midi_files[0] + midi_files.clear() + # Assuming find_midi_files() is already defined + for midi_file in find_midi_files(leading_path): + midi_files.append(midi_file) # Convert Path object to string and append + + # Process files normally + results = reader.process_files(midi_files, args.output) + + # Print to console if requested or if no output file specified + if args.pretty or not args.output: + print(json.dumps(results, indent=2)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/midi_meta/meta_writer.py b/midi_meta/meta_writer.py new file mode 100644 index 0000000..a07cda5 --- /dev/null +++ b/midi_meta/meta_writer.py @@ -0,0 +1,155 @@ +import json +import argparse +import logging + +from typing import Dict, Any +from mido import MidiFile, MidiTrack, MetaMessage + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class MidiMetadataWriter: + def __init__(self): + """Initialize the MIDI metadata writer.""" + pass + + def _create_text_meta_message(self, msg_type: str, text: str) -> MetaMessage: + """ + Create a MIDI meta message with the specified type and text. + + Args: + msg_type: The type of meta message ('text', 'copyright', etc.) + text: The text content for the message + + Returns: + A MIDI meta message + """ + return MetaMessage(msg_type, text=text, time=0) + + def write_metadata_to_file(self, midi_path: str, metadata: Dict[str, Any], output_path: str = None) -> None: + """ + Write metadata to a MIDI file. + + Args: + midi_path: Path to the original MIDI file + metadata: Dictionary containing metadata to write + output_path: Path to write the modified MIDI file (if None, overwrites original) + """ + if output_path is None: + output_path = midi_path + + try: + # Load the original MIDI file + midi = MidiFile(midi_path) + + # Create new meta messages based on the metadata + meta_messages = [] + + # Add title metadata + if "title" in metadata and metadata["title"]["name"] != "Unknown": + meta_messages.append( + MetaMessage('track_name', text=metadata["title"]["name"], time=0) + ) + + # Add copyright metadata + if "copyright" in metadata and metadata["copyright"]["text"] != "Unknown": + meta_messages.append( + MetaMessage('copyright', text=metadata["copyright"]["text"], time=0) + ) + + # Add artist as text metadata + if "artist" in metadata and metadata["artist"]["name"] != "Unknown": + meta_messages.append( + MetaMessage('text', text=f"Artist: {metadata['artist']['name']}", time=0) + ) + + # Add comment as text metadata + if "comment" in metadata and metadata["comment"]["text"] != "Unknown": + meta_messages.append( + MetaMessage('text', text=metadata["comment"]["text"], time=0) + ) + + # If the file has tracks, add metadata to the first track + if len(midi.tracks) > 0: + # Add metadata messages to the beginning of the first track + # We need to preserve the existing track, but add our metadata at the start + track = midi.tracks[0] + new_track = MidiTrack() + + # Add our metadata messages first + for msg in meta_messages: + new_track.append(msg) + + # Then add all existing messages from the original track + for msg in track: + new_track.append(msg) + + # Replace the original track with our modified one + midi.tracks[0] = new_track + else: + # If there are no tracks, create a new one with our metadata + track = MidiTrack() + for msg in meta_messages: + track.append(msg) + midi.tracks.append(track) + + # Save the modified MIDI file + # midi.save(output_path) + logger.info(f"Successfully wrote metadata to {output_path}") + + except Exception as e: + logger.error(f"Error writing metadata to {output_path}: {e}") + raise + + def process_files(self, metadata_json_path: str, midi_dir: str, output_dir: str = None) -> None: + """ + Process all MIDI files based on metadata from a JSON file. + + Args: + metadata_json_path: Path to the JSON file containing metadata + midi_dir: Directory containing the original MIDI files + output_dir: Directory to write modified MIDI files (if None, overwrites originals) + """ + try: + # Load the metadata JSON + with open(metadata_json_path, 'r') as f: + metadata_dict = json.load(f) + + # Process each file in the metadata dictionary + for filename, metadata in metadata_dict.items(): + # Construct full paths + midi_path = f"{midi_dir}/{filename}" + + if output_dir: + # Ensure output directory exists + import os + os.makedirs(output_dir, exist_ok=True) + output_path = f"{output_dir}/{filename}" + else: + output_path = midi_path + + logger.info(f"Processing {filename}") + self.write_metadata_to_file(midi_path, metadata, output_path) + + except Exception as e: + logger.error(f"Error processing files: {e}") + raise + + +def main(): + parser = argparse.ArgumentParser(description='Write metadata to MIDI files') + parser.add_argument('metadata_file', help='Path to the JSON file containing metadata') + parser.add_argument('midi_dir', help='Directory containing the original MIDI files') + parser.add_argument('--output-dir', help='Directory to write modified MIDI files (if omitted, overwrites originals)') + + args = parser.parse_args() + + # Create and run the writer + writer = MidiMetadataWriter() + writer.process_files(args.metadata_file, args.midi_dir, args.output_dir) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/midi_meta/updater.sh b/midi_meta/updater.sh new file mode 100755 index 0000000..f1f0319 --- /dev/null +++ b/midi_meta/updater.sh @@ -0,0 +1,23 @@ +set -xeuo pipefail + +## For a single file: +#echo exiftool -Title="Song Title" -Artist="Artist Name" -Copyright="Copyright Info" -Comment="Interesting comment" input.mid + +OUTPUT_DIR=~/tmp/out +mkdir "${OUTPUT_DIR}" + +METADATA_JSON=~/tmp/25041621-gsmid-meta-head-jq-out.json + +# To process all files from the JSON in a for loop directly in the terminal: +for file in $(jq -r 'keys[]' ${METADATA_JSON}); do + title=$(jq -r ".[\"$file\"].title.name" ${METADATA_JSON}) + artist=$(jq -r ".[\"$file\"].artist.name" ${METADATA_JSON}) + copyright=$(jq -r ".[\"$file\"].copyright.text" ${METADATA_JSON}) + comment=$(jq -r ".[\"$file\"].comment.text" ${METADATA_JSON}) + + cp "$file" "${OUTPUT_DIR}" + OUTPUT_FILENAME=${file##*/} + OUTPUT_FILEPATH="${OUTPUT_DIR}/${OUTPUT_FILENAME}" + echo exiftool -overwrite_original -Title="$title" -Artist="$artist" -Copyright="$copyright" -Comment="$comment" "${OUTPUT_FILEPATH}" + ls -l "${OUTPUT_FILEPATH}" +done