diff --git a/README.md b/README.md index 73f323ff..e79ec4bb 100644 --- a/README.md +++ b/README.md @@ -23,13 +23,20 @@ Given a readme file (or a GitHub/Gitlab repository) SOMEF will extract the follo - Family name: Last name of an author - Email: email of author - URL: website or ORCID associated with the author +- **Application type**: type of software (command line application, notebook, ontology, scientific workflow, etc.) - **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc. -- **Citation**: Preferred citation as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation). We aim to recognize the following properties: +- **Citation**: Preferred citation as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation). +For CITATION.cff files, SOMEF now generates two separate entries: one for the software and another for the preferred citation (is_preferred_citation: True). This ensures metadata like DOI or version is correctly assigned to each entity. +We aim to recognize the following properties: - Title: Title of the publication - Author: list of author names in the publication - URL: URL of the publication - DOI: Digital object identifier of the publication - Date published + - Version: Software version (if applicable) + - Journal: Journal name where the paper was published + - Year: Year of publication + - Pages: Page range in the journal - **Code of conduct**: Link to the code of conduct of the project - **Code repository**: Link to the GitHub/GitLab repository used for the extraction - **Contact**: Contact person responsible for maintaining a software component @@ -48,13 +55,14 @@ Given a readme file (or a GitHub/Gitlab repository) SOMEF will extract the follo - **Forks url**: Links to forks made of the project - **Full name**: Name + owner (owner/name) - **Full title**: If the repository is a short name, we will attempt to extract the longer version of the repository name +- **Funding**: Funding information associated with the project. **Note**: Currently, this information is only extracted from existing `codemeta.json` files within the repository. - **Identifier**: Identifier associated with the software (if any), such as Digital Object Identifiers and Software Heritage identifiers (SWH). DOIs associated with publications will also be detected. - **Images**: Images used to illustrate the software component - **Installation instructions**: A set of instructions that indicate how to install a target repository - **Invocation**: Execution command(s) needed to run a scientific software component - **Issue tracker**: Link where to open issues for the target repository - **Keywords**: set of terms used to commonly identify a software component -- **License**: License and usage terms of a software component +- **License**: License and usage terms of a software component. Now we also extract license from citation.cff. - **Logo**: Main logo used to represent the target software component - **Maintainer**: Individuals or teams responsible for maintaining the software component, extracted from the CODEOWNERS file - **Name**: Name identifying a software component @@ -77,12 +85,11 @@ Given a readme file (or a GitHub/Gitlab repository) SOMEF will extract the follo - **Repository status**: Repository status as it is described in [repostatus.org](https://www.repostatus.org/). - **Requirements**: Pre-requisites and dependencies needed to execute a software component - **Run**: Running instructions of a software component. It may be wider than the `invocation` category, as it may include several steps and explanations. -- **Runtime platform**: specifies runtime platform or script interpreter dependencies required to run the project.. +- **Runtime platform**: specifies the runtime environment or script interpreter dependencies (e.g., Python, Java). - **Script files**: Bash script files contained in the repository - **Stargazers count**: Total number of stargazers of the project - **Support**: Guidelines and links of where to obtain support for a software component - **Support channels**: Help channels one can use to get support about the target software component -- **Type**: type of software (command line application, notebook, ontology, scientific workflow, etc.) - **Usage examples**: Assumptions and considerations recorded by the authors when executing a software component, or examples on how to use it - **Workflows**: URL and path to the computational workflow files present in the repository diff --git a/docs/codemetajson.md b/docs/codemetajson.md index 8a702278..aa666bc9 100644 --- a/docs/codemetajson.md +++ b/docs/codemetajson.md @@ -28,8 +28,9 @@ These fields are defined in the [Codemeta specification](https://github.com/code | development_status | development_status[i].result.value | developmentStatus | | download_url | download_url[i].result.value | downloadUrl | | has_package_file | has_package_file[i].result.value | URL of the codemeta.json file | -| funding - funder | funding[i].result.funder | funding.funder or funding.funder.name | -| funding - funding | funding[i].result.funding | String.fundingIdentifier | +| funding - funder | funding[i].result.funder | funder.@id or funder.name *(1)*| +| funding - funding | funding[i].result.funding | funding *(1)*| +| funding - value | funding[i].result.value | funding string or funder.name *(1)*| | identifier | identifier[i].result.value | identifier | | issue_tracker | issue_tracker[i].result.value | issueTracker | | keywords | keywords[i].result.value | keywords | @@ -49,4 +50,41 @@ These fields are defined in the [Codemeta specification](https://github.com/code | version | version[i].result.value | softwareVersion or version | - \ No newline at end of file +--- + +*(1)* + +- SOMEF json result: + +``` +"funding": [ + { + "result": { + "value": "1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software", + "type": "String", + "funder": { + "@id": "https://doi.org/10.13039/100000001", + "@type": "Organization", + "name": "National Science Foundation" + }, + "funding": "1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software" + }, + "confidence": 1, + "technique": "code_parser", + "source": "https://raw.githubusercontent.com/.../codemeta.json" + } +] +``` + +- CODEMETA output: +``` +"funder": { + "@id": "https://doi.org/10.13039/100000001", + "@type": "Organization", + "name": "National Science Foundation" + }, +"funding": "1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software", +``` + + + diff --git a/docs/condaenvironment.md b/docs/condaenvironment.md index c5f638e2..e08f96b1 100644 --- a/docs/condaenvironment.md +++ b/docs/condaenvironment.md @@ -35,7 +35,7 @@ dependencies: "value": "python=3.8.5", "name": "python", "version": "3.8.5", - "type": "Software_application", + "type": "SoftwareDependency", "dependency_type": "runtime", "dependency_resolver": "conda" }, @@ -43,7 +43,7 @@ dependencies: "value": "albumentations==0.4.3", "name": "albumentations", "version": "0.4.3", - "type": "Software_application", + "type": "SoftwareDependency", "dependency_type": "runtime", "dependency_resolver": "pip" }, diff --git a/docs/gemspec.md b/docs/gemspec.md index 041145d1..b2a23023 100644 --- a/docs/gemspec.md +++ b/docs/gemspec.md @@ -79,7 +79,7 @@ spec.requirements = [ Result: add_depency -> type runtime; add_development_dependencyd -> type dev ``` - [{'result': {'value': 'railties: >= 3.0', 'name': 'railties', 'version': '>= 3.0', 'type': 'Software_application', 'dependency_type': 'runtime', 'dependency_resolver': 'bundler'}, 'confidence': 1, 'technique': 'code_parser', 'source': 'https://example.org/bootstrap-datepicker-rails.gemspec'}, {'result': {'value': 'bundler: >= 1.0', 'name': 'bundler', 'version': '>= 1.0', 'type': 'Software_application', 'dependency_type': 'dev','dependency_resolver': 'bundler'}, 'confidence': 1, 'technique': 'code_parser', 'source': 'https://example.org/bootstrap-datepicker-rails.gemspec'}] + [{'result': {'value': 'railties: >= 3.0', 'name': 'railties', 'version': '>= 3.0', 'type': 'SoftwareDependency', 'dependency_type': 'runtime', 'dependency_resolver': 'bundler'}, 'confidence': 1, 'technique': 'code_parser', 'source': 'https://example.org/bootstrap-datepicker-rails.gemspec'}, {'result': {'value': 'bundler: >= 1.0', 'name': 'bundler', 'version': '>= 1.0', 'type': 'SoftwareDependency', 'dependency_type': 'dev','dependency_resolver': 'bundler'}, 'confidence': 1, 'technique': 'code_parser', 'source': 'https://example.org/bootstrap-datepicker-rails.gemspec'}] ``` diff --git a/docs/index.md b/docs/index.md index ac13d86e..74db03ab 100644 --- a/docs/index.md +++ b/docs/index.md @@ -15,6 +15,7 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca - **Acknowledgement**: Text acknowledging funding sources or contributors - **Application domain**: The application domain of the repository. This may be related to the research area of a software component (e.g., Astrophysics) or the general domain/functionality of the tool (i.e., machine learning projects)[1](#myfootnote1) +- **Application type**: type of software (command line application, notebook, ontology, scientific workflow, etc.) - **Assets**: files attached to the release - url: URL of the publication of the file - name: name of the file @@ -31,12 +32,18 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca - URL: website or ORCID associated with the author - Affiliation: name of organization or affiliation - **Build file**: Build file(s) of the project. For example, files used to create a Docker image for the target software, package files, etc. -- **Citation**: Preferred citation as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation). We aim to recognize the following properties: +- **Citation**: Preferred citation(s) as the authors have stated in their readme file. SOMEF recognizes Bibtex, Citation File Format files and other means by which authors cite their papers (e.g., by in-text citation). +For CITATION.cff files, SOMEF now generates two separate entries: one for the software and another for the preferred citation (is_preferred_citation: True). This ensures metadata like DOI or version is correctly assigned to each entity. +We aim to recognize the following properties: - Title: Title of the publication - Author: list of author names in the publication - URL: URL of the publication - DOI: Digital object identifier of the publication - - Date published: + - Date published + - Version: Software version (if applicable, i.e., the main citation is a software deposit) + - Journal: Journal name where the paper was published + - Year: Year of publication + - Pages: Page range in the journal - **Code of conduct**: Link to the code of conduct of the project - **Code repository**: Link to the GitHub/GitLab repository used for the extraction - **Contact**: Contact person responsible for maintaining a software component @@ -55,6 +62,7 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca - **Forks url**: Links to forks made of the project - **Full name**: Name + owner (owner/name) - **Full title**: If the repository is a short name, we will attempt to extract the longer version of the repository name +- **Funding**: Funding information associated with the project. **Note**: Currently, this information is only extracted from existing `codemeta.json` files within the repository. - **Homepage**: URL of the item. - **Identifier**: Identifier associated with the software (if any), such as Digital Object Identifiers and Software Heritage identifiers (SWH). DOIs associated with publications will also be detected. - **Images**: Images used to illustrate the software component @@ -62,7 +70,7 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca - **Invocation**: Execution command(s) needed to run a scientific software component - **Issue tracker**: Link where to open issues for the target repository - **Keywords**: set of terms used to commonly identify a software component -- **License**: License and usage terms of a software component +- **License**: License and usage terms of a software component. - **Logo**: Main logo used to represent the target software component - **Name**: Name identifying a software component - **Ontologies**: URL and path to the ontology files present in the repository @@ -85,12 +93,11 @@ Given a readme file (or a GitHub repository) SOMEF will extract the following ca - **Repository status**: Repository status as it is described in [repostatus.org](https://www.repostatus.org/). - **Requirements**: Pre-requisites and dependencies needed to execute a software component - **Run**: Running instructions of a software component. It may be wider than the `invocation` category, as it may include several steps and explanations. -- **Runtime platform**: specifies runtime platform or script interpreter dependencies required to run the project. +- **Runtime platform**: specifies the runtime environment or script interpreter dependencies (e.g., Python, Java). - **Script files**: Bash script files contained in the repository - **Stargazers count**: Total number of stargazers of the project - **Support**: Guidelines and links of where to obtain support for a software component - **Support channels**: Help channels one can use to get support about the target software component -- **Type**: type of software (command line application, notebook, ontology, scientific workflow, etc.) - **Usage examples**: Assumptions and considerations recorded by the authors when executing a software component, or examples on how to use it - **Workflows**: URL and path to the computational workflow files present in the repository diff --git a/docs/output.md b/docs/output.md index 30831499..43e2be94 100644 --- a/docs/output.md +++ b/docs/output.md @@ -67,8 +67,9 @@ SOMEF aims to recognize the following categories (in alphabetical order): - `acknowledgement`: Any text that the authors have prepared to acknnowledge the contribution from others, or project funding. - `application_domain`: The application domain of the repository. This may be related to the research area of a software component (e.g., Astrophysics) or the general domain/functionality of the tool (i.e., machine learning projects). See all current recognized application domains [here](https://somef.readthedocs.io/en/latest/#myfootnote1). +- `application_type`: Software type: Commandline Application, Notebook Application, Ontology, Scientific Workflow. Non-Software types: Static Website, Uncategorized - `authors`: Person or organization responsible of the project. This property is also used to indicate the responsible entities of a publication associated with the code repository. -- `citation`: Software citation (usually in `.bib` form) as the authors have stated in their readme file, or through a `CFF` file. +- `citation`: Software citation (usually in .bib or .cff format). SOMEF extracts and structures the metadata from these files (including authors, titles, and DOIs) instead of just returning a raw string. - `code_of_conduct`: Link to the code of conduct file of the project - `code_repository`: Link to the source code (typically the repository where the readme can be found) - `contact`: Contact person responsible for maintaining a software component. @@ -88,7 +89,9 @@ SOMEF aims to recognize the following categories (in alphabetical order): - `forks_url`: Links to forks made of the project (GitHub only) - `full_name`: Name + owner (owner/name) (if available) - `full_title`: If the repository has a short name, we will attempt to extract the longer version of the repository name. For example, a repository may be called "Widoco", but the longer title is "Wizard for documenting ontologies". +- `funding`: Funding code for the related project. Currently, this information is only extracted from existing `codemeta.json` files within the repository. - `has_build_file`: Build file to create a Docker image for the target software +- `has_package_file`: Specifies what package file is present in the code repository. - `has_script_file`: Snippets of code contained in the repository. - `homepage`: URL of the item. - `identifier`: Identifiers detected within a repository (e.g., Digital Object Identifier). @@ -105,24 +108,22 @@ SOMEF aims to recognize the following categories (in alphabetical order): - `owner`: Name of the user or organization in charge of the repository - `package_distribution`: Link to official package repositories where the software can be downloaded from (e.g., `pypi`). - `package_file`: Link to a package file used in the repository (e.g., `pyproject.toml`, `setup.py`). +- `package_id`: Identifier extracted from packages. (e.g., `packages.json`) - `programming_languages`: Languages used in the repository. - `readme_url`: URL to the main README file in the repository. +- `reference_publication`: URL to the paper associated with the code repository. - `related_papers`: URL to possible related papers within the repository stated within the readme file. - `releases`: Pointer to the available versions of a software component. - `repository_status`: Repository status as it is described in [repostatus.org](https://www.repostatus.org/). - `requirements`: Pre-requisites and dependencies needed to execute a software component. - `run`: Running instructions of a software component. It may be wider than the `invocation` category, as it may include several steps and explanations. +- `runtime_platform`: Specifies the runtime environment or script interpreter dependencies required to run the project (e.g., Python, Java, Julia). - `stargazers_count`: Total number of stargazers of the project. - `support`: Guidelines and links of where to obtain support for a software component. - `support_channels`: Help channels one can use to get support about the target software component. -- `type`: Software type: Commandline Application, Notebook Application, Ontology, Scientific Workflow. Non-Software types: Static Website, Uncategorized - `usage`: Usage examples and considerations of a code repository. - `workflows`: URL and path to the computational workflow files present in the repository. -- `homepage`: URL to the homepage of the software or organization. -- `reference_publication`: URL to the paper associated with the code repository. -- `package_id`: Identifier extracted from packages. (e.g., `packages.json`) -- `funding`: Funding code for the related project. -- `has_package_file`: Specifies what package file is present in the code repository. + The following table summarized the properties used to describe a `category`: @@ -171,9 +172,10 @@ The following object `types` are currently supported: - `Programming_language`: Programming language used in the repository. - `License`: object representing all the metadata SOMEF extracts from a license. - `Agent`: user (typically, a person) or organization responsible for authoring a software release or a paper. -- `Publication`: Scientific paper associated with the code repository. -- `SoftwareApplication`: Class to represent software dependencies between projects. -- `Runtime_platform`: specifies runtime platform or script interpreter dependencies required to run the project.. +- `ScholarlyArticle`: Scientific paper or article associated with the code repository. +- `SoftwareApplication`: Class to represent the main software component metadata. +- `SoftwareDependency`: Class to represent software dependencies and runtime platforms required to run the project. + The following literal types are currently supported: - `Number`: A numerical value. We do not distinguish between integer, long or float. @@ -184,49 +186,24 @@ The following literal types are currently supported: - `Url`: uniform resource locator of a file. - - - -The tables below summarizes all types and their corresponding properties- - -An AGENT has the following properties: +An Agent has the following properties: | Property | Expected value | Definition | |---|---|---| +| **affiliation** | String | name of organization or affiliation | | **email** | String | Email of an author | | **family_name** | String | Last name of an author | | **given_name** | String | First name of an author | +| **identifier** | String | id of an agent | | **name** | String | Name used to designate the person or organization| +| **role** | String | The role of the agent in the development or maintenance of this software component | | **url** | Url | Uniform resource locator of the resource | -| **affiliation** | String | name of organization or affiliation | -| **identifier** | String | id of an agent | -| **role** | String | role of agent | -An ASSET has the following properties: + +An Asset has the following properties: | Property | Expected value | Definition | |---|---|---| @@ -239,17 +216,17 @@ An ASSET has the following properties: | **url** | Url | Uniform resource locator of the resource | - -A LICENSE has the following properties: +A License has the following properties: | Property | Expected value | Definition | |---|---|---| +| **identifier** | String | id of licence | | **name** | String | Title or name of the license | | **spdx_id** | String | Spdx id corresponding to this license | | **url** | Url | Uniform resource locator of the license | -| **identifier** | String | id of licence | -A PROGRAMMING_LANGUAGE has the following properties: + +A Programming_language has the following properties: | Property | Expected value | Definition | |---|---|---| @@ -257,17 +234,7 @@ A PROGRAMMING_LANGUAGE has the following properties: | **size** | Integer | File size content (bytes) of a code repository using a given programming language | -A PUBLICATION has the following properties: - -| Property | Expected value | Definition | -|---|---|---| -| **author** | Agent, Organization | Person or organization responsible for creating an article or a software release. | -| **doi** | Url | When a publication is detected, but the format is in bibtek or CFF, SOMEF will add a `doi` field with the detected DOI value. The result includes a full URL. | -| **title** | String | Title of the publication | -| **url** | Url | Uniform resource locator of the resource | - - -A RELEASE has the following properties: +A Release has the following properties: | Property | Expected value | Definition | |---|---|---| @@ -285,45 +252,35 @@ A RELEASE has the following properties: | **zipball_url** | Url | URL to the zip file where to download a software release | - A REQUIREMENT has the following properties: - -| Property | Expected value | Definition | -|---|---|---| -| **name** | String | Name of the requeriment | -| **version** | String | named version of a requeriment | -| **dependency_type** | String | type: dev, runtime... Indicates whether the dependency is required at runtime or only for development/testing | -| **dependency_resolver** | String | Identifies the ecosystem or package manager that resolves the dependency (e.g., npm, bower, pip, python, poetry, pdm, cargo, julia, maven, publicode).| - -A RUNTIME_PLATFORM has the following properties: - -| Property | Expected value | Definition | -|---|---|---| -| **name** | String | Name of the runtime platform (e.g., Java) | - **version** | String | version of the runtime platform | -| **value** | String | name and version of the runtime platform | - - -A SCHOLARLY_ARTICLE has the following properties: +A ScholarlyArticle has the following properties: | Property | Expected value | Definition | |---|---|---| +| **authors** | List of Agent| List of authors responsible for the publication, providing structured metadata for each | +| **date_published** | String | Date when the article or citation was officially published. | +| **doi** | String | Digital Object Identifier (DOI) of the reference, usually returned as a full URL.| +| **journal** | String | Journal where the publication appeared | +| **pages** | String | Page range of the publication | | **title** | String | Title of reference or citation | -| **value** | String | Title of reference or citation | | **url** | String | Link to reference or citation | -| **date_published** | String | date of publication reference or citation | -| **doi** | String | Identifier of reference| +| **value** | String | Title of reference or citation | +| **year** | Number | Year of publication | -A SOFTWARE_APPLICATION has the following properties: +A SoftwareApplication or SoftwareDependency has the following properties: | Property | Expected value | Definition | |---|---|---| -| **name** | String | Name of the software | -| **value** | String | Name and version of the software | -| **version** | String | version of software | -| **development_type** | String | runtime or dev | +| **dependency_type** | String | Indicates whether the dependency is required at runtime or only for development/testing (e.g., `dev`, `runtime`, `os`). | +| **dependency_resolver** | String | Identifies the ecosystem or package manager that resolves the dependency (e.g., `npm`, `pip`, `julia`, `conda`).| +| **is_preferred_citation** | Boolean | Set to `True` if the authors explicitly state this is the preferred citation. Omitted otherwise. | +| **name** | String | Name of the software, dependency, or runtime platform (e.g., "pandas", "python"). | +| **type** | String | The object type: `SoftwareApplication` (for the main repository) or `SoftwareDependency` (for requirements and platforms). | +| **value** | String | A string representation typically combining name and version. | +| **version** | String | The version or version range of the software/dependency. | + -A TEXT_EXCERPT has the following properties: +A Text_excerpt has the following properties: | Property | Expected value | Definition | |---|---|---| diff --git a/docs/packagejson.md b/docs/packagejson.md index da5fcc10..42b868f6 100644 --- a/docs/packagejson.md +++ b/docs/packagejson.md @@ -80,7 +80,7 @@ or ... ``` Resutl: -```{'result': {'value': 'foo@1.0.0 - 2.9999.9999', 'name': 'foo', 'version': '1.0.0 - 2.9999.9999', 'type': 'Software_application'}, 'confidence': 1, 'technique': 'code_parser', 'source': 'http://example.com/package_neors.json'}``` +```{'result': {'value': 'foo@1.0.0 - 2.9999.9999', 'name': 'foo', 'version': '1.0.0 - 2.9999.9999', 'type': 'SoftwareDependency'}, 'confidence': 1, 'technique': 'code_parser', 'source': 'http://example.com/package_neors.json'}``` *(5)* - Example: diff --git a/docs/pom.md b/docs/pom.md index b03dcda7..832f8210 100644 --- a/docs/pom.md +++ b/docs/pom.md @@ -80,7 +80,7 @@ package_distribution': [{'result': {'value': 'http://127.0.0.1/websvn/my-project {'value': 'org.apache.maven.maven-model', 'name': 'maven-model', 'version': '3.9.0', - 'type': 'Software_application'}, + 'type': 'SoftwareDependency'}, ``` diff --git a/docs/publiccode.md b/docs/publiccode.md index 3dccbc65..f938a699 100644 --- a/docs/publiccode.md +++ b/docs/publiccode.md @@ -127,7 +127,7 @@ dependsOn: "value": "PostgreSQL>=14.0", "name": "PostgreSQL", "version": ">=14.0", - "type": "Software_application" + "type": "SoftwareDependency" }, ``` @@ -136,7 +136,7 @@ dependsOn: "value": "PostgreSQL>=14.0", "name": "PostgreSQL", "version": ">=14.0", - "type": "Software_application", + "type": "SoftwareDependency", "dependency_type": "runtime", "dependency_resolver": "pucliccode" }, diff --git a/docs/supported_metadata_files.md b/docs/supported_metadata_files.md index 3d74bc73..a035b2ae 100644 --- a/docs/supported_metadata_files.md +++ b/docs/supported_metadata_files.md @@ -43,7 +43,7 @@ SOMEF can extract metadata from a wide range of files commonly found in software | Keywords | keywords | | License | license | | Release | version | -| Software_application | requirements | +| SoftwareDependency | requirements | | String | description | | String | name | | String | package_id | @@ -86,7 +86,7 @@ The following Python code snippet show the logic used by the SOMEF parser to tra "value": f'{dependency.get("groupId", "")}.{dependency.get("artifactId", "")}'.strip("."), "name": dependency.get("artifactId", ""), "version": dependency.get("version", ""), - "type": constants.SOFTWARE_APPLICATION + "type": constants.SOFTWARE_DEPENDENCY }, 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, @@ -105,7 +105,7 @@ After applying the mapping logic, the metadata for the dependency is stored unde "value": "org.apache.maven.maven-model", "name": "maven-model", "version": "3.9.0", - "type": "Software_application" + "type": "SoftwareDependency" }, "confidence": 1, "technique": "code_parser", diff --git a/poetry.lock b/poetry.lock index be43bff3..1f462f70 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1427,103 +1427,103 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pillow" -version = "12.1.1" +version = "12.2.0" description = "Python Imaging Library (fork)" optional = false python-versions = ">=3.10" groups = ["main"] files = [ - {file = "pillow-12.1.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f1625b72740fdda5d77b4def688eb8fd6490975d06b909fd19f13f391e077e0"}, - {file = "pillow-12.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:178aa072084bd88ec759052feca8e56cbb14a60b39322b99a049e58090479713"}, - {file = "pillow-12.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b66e95d05ba806247aaa1561f080abc7975daf715c30780ff92a20e4ec546e1b"}, - {file = "pillow-12.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89c7e895002bbe49cdc5426150377cbbc04767d7547ed145473f496dfa40408b"}, - {file = "pillow-12.1.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a5cbdcddad0af3da87cb16b60d23648bc3b51967eb07223e9fed77a82b457c4"}, - {file = "pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9f51079765661884a486727f0729d29054242f74b46186026582b4e4769918e4"}, - {file = "pillow-12.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:99c1506ea77c11531d75e3a412832a13a71c7ebc8192ab9e4b2e355555920e3e"}, - {file = "pillow-12.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36341d06738a9f66c8287cf8b876d24b18db9bd8740fa0672c74e259ad408cff"}, - {file = "pillow-12.1.1-cp310-cp310-win32.whl", hash = "sha256:6c52f062424c523d6c4db85518774cc3d50f5539dd6eed32b8f6229b26f24d40"}, - {file = "pillow-12.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6008de247150668a705a6338156efb92334113421ceecf7438a12c9a12dab23"}, - {file = "pillow-12.1.1-cp310-cp310-win_arm64.whl", hash = "sha256:1a9b0ee305220b392e1124a764ee4265bd063e54a751a6b62eff69992f457fa9"}, - {file = "pillow-12.1.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e879bb6cd5c73848ef3b2b48b8af9ff08c5b71ecda8048b7dd22d8a33f60be32"}, - {file = "pillow-12.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:365b10bb9417dd4498c0e3b128018c4a624dc11c7b97d8cc54effe3b096f4c38"}, - {file = "pillow-12.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4ce8e329c93845720cd2014659ca67eac35f6433fd3050393d85f3ecef0dad5"}, - {file = "pillow-12.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc354a04072b765eccf2204f588a7a532c9511e8b9c7f900e1b64e3e33487090"}, - {file = "pillow-12.1.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e7976bf1910a8116b523b9f9f58bf410f3e8aa330cd9a2bb2953f9266ab49af"}, - {file = "pillow-12.1.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:597bd9c8419bc7c6af5604e55847789b69123bbe25d65cc6ad3012b4f3c98d8b"}, - {file = "pillow-12.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2c1fc0f2ca5f96a3c8407e41cca26a16e46b21060fe6d5b099d2cb01412222f5"}, - {file = "pillow-12.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:578510d88c6229d735855e1f278aa305270438d36a05031dfaae5067cc8eb04d"}, - {file = "pillow-12.1.1-cp311-cp311-win32.whl", hash = "sha256:7311c0a0dcadb89b36b7025dfd8326ecfa36964e29913074d47382706e516a7c"}, - {file = "pillow-12.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:fbfa2a7c10cc2623f412753cddf391c7f971c52ca40a3f65dc5039b2939e8563"}, - {file = "pillow-12.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:b81b5e3511211631b3f672a595e3221252c90af017e399056d0faabb9538aa80"}, - {file = "pillow-12.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab323b787d6e18b3d91a72fc99b1a2c28651e4358749842b8f8dfacd28ef2052"}, - {file = "pillow-12.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adebb5bee0f0af4909c30db0d890c773d1a92ffe83da908e2e9e720f8edf3984"}, - {file = "pillow-12.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb66b7cc26f50977108790e2456b7921e773f23db5630261102233eb355a3b79"}, - {file = "pillow-12.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aee2810642b2898bb187ced9b349e95d2a7272930796e022efaf12e99dccd293"}, - {file = "pillow-12.1.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b1cd6232e2b618adcc54d9882e4e662a089d5768cd188f7c245b4c8c44a397"}, - {file = "pillow-12.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7aac39bcf8d4770d089588a2e1dd111cbaa42df5a94be3114222057d68336bd0"}, - {file = "pillow-12.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ab174cd7d29a62dd139c44bf74b698039328f45cb03b4596c43473a46656b2f3"}, - {file = "pillow-12.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:339ffdcb7cbeaa08221cd401d517d4b1fe7a9ed5d400e4a8039719238620ca35"}, - {file = "pillow-12.1.1-cp312-cp312-win32.whl", hash = "sha256:5d1f9575a12bed9e9eedd9a4972834b08c97a352bd17955ccdebfeca5913fa0a"}, - {file = "pillow-12.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:21329ec8c96c6e979cd0dfd29406c40c1d52521a90544463057d2aaa937d66a6"}, - {file = "pillow-12.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:af9a332e572978f0218686636610555ae3defd1633597be015ed50289a03c523"}, - {file = "pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:d242e8ac078781f1de88bf823d70c1a9b3c7950a44cdf4b7c012e22ccbcd8e4e"}, - {file = "pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:02f84dfad02693676692746df05b89cf25597560db2857363a208e393429f5e9"}, - {file = "pillow-12.1.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:e65498daf4b583091ccbb2556c7000abf0f3349fcd57ef7adc9a84a394ed29f6"}, - {file = "pillow-12.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c6db3b84c87d48d0088943bf33440e0c42370b99b1c2a7989216f7b42eede60"}, - {file = "pillow-12.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b7e5304e34942bf62e15184219a7b5ad4ff7f3bb5cca4d984f37df1a0e1aee2"}, - {file = "pillow-12.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5bddd742a44b7e6b1e773ab5db102bd7a94c32555ba656e76d319d19c3850"}, - {file = "pillow-12.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc44ef1f3de4f45b50ccf9136999d71abb99dca7706bc75d222ed350b9fd2289"}, - {file = "pillow-12.1.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a8eb7ed8d4198bccbd07058416eeec51686b498e784eda166395a23eb99138e"}, - {file = "pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717"}, - {file = "pillow-12.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:518a48c2aab7ce596d3bf79d0e275661b846e86e4d0e7dec34712c30fe07f02a"}, - {file = "pillow-12.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a550ae29b95c6dc13cf69e2c9dc5747f814c54eeb2e32d683e5e93af56caa029"}, - {file = "pillow-12.1.1-cp313-cp313-win32.whl", hash = "sha256:a003d7422449f6d1e3a34e3dd4110c22148336918ddbfc6a32581cd54b2e0b2b"}, - {file = "pillow-12.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:344cf1e3dab3be4b1fa08e449323d98a2a3f819ad20f4b22e77a0ede31f0faa1"}, - {file = "pillow-12.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:5c0dd1636633e7e6a0afe7bf6a51a14992b7f8e60de5789018ebbdfae55b040a"}, - {file = "pillow-12.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0330d233c1a0ead844fc097a7d16c0abff4c12e856c0b325f231820fee1f39da"}, - {file = "pillow-12.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dae5f21afb91322f2ff791895ddd8889e5e947ff59f71b46041c8ce6db790bc"}, - {file = "pillow-12.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e0c664be47252947d870ac0d327fea7e63985a08794758aa8af5b6cb6ec0c9c"}, - {file = "pillow-12.1.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:691ab2ac363b8217f7d31b3497108fb1f50faab2f75dfb03284ec2f217e87bf8"}, - {file = "pillow-12.1.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9e8064fb1cc019296958595f6db671fba95209e3ceb0c4734c9baf97de04b20"}, - {file = "pillow-12.1.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:472a8d7ded663e6162dafdf20015c486a7009483ca671cece7a9279b512fcb13"}, - {file = "pillow-12.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:89b54027a766529136a06cfebeecb3a04900397a3590fd252160b888479517bf"}, - {file = "pillow-12.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:86172b0831b82ce4f7877f280055892b31179e1576aa00d0df3bb1bbf8c3e524"}, - {file = "pillow-12.1.1-cp313-cp313t-win32.whl", hash = "sha256:44ce27545b6efcf0fdbdceb31c9a5bdea9333e664cda58a7e674bb74608b3986"}, - {file = "pillow-12.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a285e3eb7a5a45a2ff504e31f4a8d1b12ef62e84e5411c6804a42197c1cf586c"}, - {file = "pillow-12.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cc7d296b5ea4d29e6570dabeaed58d31c3fea35a633a69679fb03d7664f43fb3"}, - {file = "pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af"}, - {file = "pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f"}, - {file = "pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642"}, - {file = "pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd"}, - {file = "pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202"}, - {file = "pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f"}, - {file = "pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f"}, - {file = "pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f"}, - {file = "pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e"}, - {file = "pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0"}, - {file = "pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb"}, - {file = "pillow-12.1.1-cp314-cp314-win32.whl", hash = "sha256:03edcc34d688572014ff223c125a3f77fb08091e4607e7745002fc214070b35f"}, - {file = "pillow-12.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:50480dcd74fa63b8e78235957d302d98d98d82ccbfac4c7e12108ba9ecbdba15"}, - {file = "pillow-12.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:5cb1785d97b0c3d1d1a16bc1d710c4a0049daefc4935f3a8f31f827f4d3d2e7f"}, - {file = "pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8"}, - {file = "pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9"}, - {file = "pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60"}, - {file = "pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7"}, - {file = "pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f"}, - {file = "pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586"}, - {file = "pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce"}, - {file = "pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8"}, - {file = "pillow-12.1.1-cp314-cp314t-win32.whl", hash = "sha256:d2912fd8114fc5545aa3a4b5576512f64c55a03f3ebcca4c10194d593d43ea36"}, - {file = "pillow-12.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4ceb838d4bd9dab43e06c363cab2eebf63846d6a4aeaea283bbdfd8f1a8ed58b"}, - {file = "pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334"}, - {file = "pillow-12.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:600fd103672b925fe62ed08e0d874ea34d692474df6f4bf7ebe148b30f89f39f"}, - {file = "pillow-12.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:665e1b916b043cef294bc54d47bf02d87e13f769bc4bc5fa225a24b3a6c5aca9"}, - {file = "pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:495c302af3aad1ca67420ddd5c7bd480c8867ad173528767d906428057a11f0e"}, - {file = "pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8fd420ef0c52c88b5a035a0886f367748c72147b2b8f384c9d12656678dfdfa9"}, - {file = "pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f975aa7ef9684ce7e2c18a3aa8f8e2106ce1e46b94ab713d156b2898811651d3"}, - {file = "pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8089c852a56c2966cf18835db62d9b34fef7ba74c726ad943928d494fa7f4735"}, - {file = "pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e"}, - {file = "pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4"}, + {file = "pillow-12.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:a4e8f36e677d3336f35089648c8955c51c6d386a13cf6ee9c189c5f5bd713a9f"}, + {file = "pillow-12.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e589959f10d9824d39b350472b92f0ce3b443c0a3442ebf41c40cb8361c5b97"}, + {file = "pillow-12.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a52edc8bfff4429aaabdf4d9ee0daadbbf8562364f940937b941f87a4290f5ff"}, + {file = "pillow-12.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:975385f4776fafde056abb318f612ef6285b10a1f12b8570f3647ad0d74b48ec"}, + {file = "pillow-12.2.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd9c0c7a0c681a347b3194c500cb1e6ca9cab053ea4d82a5cf45b6b754560136"}, + {file = "pillow-12.2.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:88d387ff40b3ff7c274947ed3125dedf5262ec6919d83946753b5f3d7c67ea4c"}, + {file = "pillow-12.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:51c4167c34b0d8ba05b547a3bb23578d0ba17b80a5593f93bd8ecb123dd336a3"}, + {file = "pillow-12.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34c0d99ecccea270c04882cb3b86e7b57296079c9a4aff88cb3b33563d95afaa"}, + {file = "pillow-12.2.0-cp310-cp310-win32.whl", hash = "sha256:b85f66ae9eb53e860a873b858b789217ba505e5e405a24b85c0464822fe88032"}, + {file = "pillow-12.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:673aa32138f3e7531ccdbca7b3901dba9b70940a19ccecc6a37c77d5fdeb05b5"}, + {file = "pillow-12.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:3e080565d8d7c671db5802eedfb438e5565ffa40115216eabb8cd52d0ecce024"}, + {file = "pillow-12.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:8be29e59487a79f173507c30ddf57e733a357f67881430449bb32614075a40ab"}, + {file = "pillow-12.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:71cde9a1e1551df7d34a25462fc60325e8a11a82cc2e2f54578e5e9a1e153d65"}, + {file = "pillow-12.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f490f9368b6fc026f021db16d7ec2fbf7d89e2edb42e8ec09d2c60505f5729c7"}, + {file = "pillow-12.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8bd7903a5f2a4545f6fd5935c90058b89d30045568985a71c79f5fd6edf9b91e"}, + {file = "pillow-12.2.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3997232e10d2920a68d25191392e3a4487d8183039e1c74c2297f00ed1c50705"}, + {file = "pillow-12.2.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e74473c875d78b8e9d5da2a70f7099549f9eb37ded4e2f6a463e60125bccd176"}, + {file = "pillow-12.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:56a3f9c60a13133a98ecff6197af34d7824de9b7b38c3654861a725c970c197b"}, + {file = "pillow-12.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:90e6f81de50ad6b534cab6e5aef77ff6e37722b2f5d908686f4a5c9eba17a909"}, + {file = "pillow-12.2.0-cp311-cp311-win32.whl", hash = "sha256:8c984051042858021a54926eb597d6ee3012393ce9c181814115df4c60b9a808"}, + {file = "pillow-12.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e6b2a0c538fc200b38ff9eb6628228b77908c319a005815f2dde585a0664b60"}, + {file = "pillow-12.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:9a8a34cc89c67a65ea7437ce257cea81a9dad65b29805f3ecee8c8fe8ff25ffe"}, + {file = "pillow-12.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2d192a155bbcec180f8564f693e6fd9bccff5a7af9b32e2e4bf8c9c69dbad6b5"}, + {file = "pillow-12.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3f40b3c5a968281fd507d519e444c35f0ff171237f4fdde090dd60699458421"}, + {file = "pillow-12.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:03e7e372d5240cc23e9f07deca4d775c0817bffc641b01e9c3af208dbd300987"}, + {file = "pillow-12.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b86024e52a1b269467a802258c25521e6d742349d760728092e1bc2d135b4d76"}, + {file = "pillow-12.2.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7371b48c4fa448d20d2714c9a1f775a81155050d383333e0a6c15b1123dda005"}, + {file = "pillow-12.2.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62f5409336adb0663b7caa0da5c7d9e7bdbaae9ce761d34669420c2a801b2780"}, + {file = "pillow-12.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:01afa7cf67f74f09523699b4e88c73fb55c13346d212a59a2db1f86b0a63e8c5"}, + {file = "pillow-12.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc3d34d4a8fbec3e88a79b92e5465e0f9b842b628675850d860b8bd300b159f5"}, + {file = "pillow-12.2.0-cp312-cp312-win32.whl", hash = "sha256:58f62cc0f00fd29e64b29f4fd923ffdb3859c9f9e6105bfc37ba1d08994e8940"}, + {file = "pillow-12.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7f84204dee22a783350679a0333981df803dac21a0190d706a50475e361c93f5"}, + {file = "pillow-12.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:af73337013e0b3b46f175e79492d96845b16126ddf79c438d7ea7ff27783a414"}, + {file = "pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8297651f5b5679c19968abefd6bb84d95fe30ef712eb1b2d9b2d31ca61267f4c"}, + {file = "pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:50d8520da2a6ce0af445fa6d648c4273c3eeefbc32d7ce049f22e8b5c3daecc2"}, + {file = "pillow-12.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:766cef22385fa1091258ad7e6216792b156dc16d8d3fa607e7545b2b72061f1c"}, + {file = "pillow-12.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5d2fd0fa6b5d9d1de415060363433f28da8b1526c1c129020435e186794b3795"}, + {file = "pillow-12.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56b25336f502b6ed02e889f4ece894a72612fe885889a6e8c4c80239ff6e5f5f"}, + {file = "pillow-12.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f1c943e96e85df3d3478f7b691f229887e143f81fedab9b20205349ab04d73ed"}, + {file = "pillow-12.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03f6fab9219220f041c74aeaa2939ff0062bd5c364ba9ce037197f4c6d498cd9"}, + {file = "pillow-12.2.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cdfebd752ec52bf5bb4e35d9c64b40826bc5b40a13df7c3cda20a2c03a0f5ed"}, + {file = "pillow-12.2.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eedf4b74eda2b5a4b2b2fb4c006d6295df3bf29e459e198c90ea48e130dc75c3"}, + {file = "pillow-12.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00a2865911330191c0b818c59103b58a5e697cae67042366970a6b6f1b20b7f9"}, + {file = "pillow-12.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e1757442ed87f4912397c6d35a0db6a7b52592156014706f17658ff58bbf795"}, + {file = "pillow-12.2.0-cp313-cp313-win32.whl", hash = "sha256:144748b3af2d1b358d41286056d0003f47cb339b8c43a9ea42f5fea4d8c66b6e"}, + {file = "pillow-12.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:390ede346628ccc626e5730107cde16c42d3836b89662a115a921f28440e6a3b"}, + {file = "pillow-12.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:8023abc91fba39036dbce14a7d6535632f99c0b857807cbbbf21ecc9f4717f06"}, + {file = "pillow-12.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:042db20a421b9bafecc4b84a8b6e444686bd9d836c7fd24542db3e7df7baad9b"}, + {file = "pillow-12.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd025009355c926a84a612fecf58bb315a3f6814b17ead51a8e48d3823d9087f"}, + {file = "pillow-12.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88ddbc66737e277852913bd1e07c150cc7bb124539f94c4e2df5344494e0a612"}, + {file = "pillow-12.2.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d362d1878f00c142b7e1a16e6e5e780f02be8195123f164edf7eddd911eefe7c"}, + {file = "pillow-12.2.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c727a6d53cb0018aadd8018c2b938376af27914a68a492f59dfcaca650d5eea"}, + {file = "pillow-12.2.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd8c21c98c5cc60653bcb311bef2ce0401642b7ce9d09e03a7da87c878289d4"}, + {file = "pillow-12.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9f08483a632889536b8139663db60f6724bfcb443c96f1b18855860d7d5c0fd4"}, + {file = "pillow-12.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dac8d77255a37e81a2efcbd1fc05f1c15ee82200e6c240d7e127e25e365c39ea"}, + {file = "pillow-12.2.0-cp313-cp313t-win32.whl", hash = "sha256:ee3120ae9dff32f121610bb08e4313be87e03efeadfc6c0d18f89127e24d0c24"}, + {file = "pillow-12.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:325ca0528c6788d2a6c3d40e3568639398137346c3d6e66bb61db96b96511c98"}, + {file = "pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453"}, + {file = "pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:3adc9215e8be0448ed6e814966ecf3d9952f0ea40eb14e89a102b87f450660d8"}, + {file = "pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:6a9adfc6d24b10f89588096364cc726174118c62130c817c2837c60cf08a392b"}, + {file = "pillow-12.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:6a6e67ea2e6feda684ed370f9a1c52e7a243631c025ba42149a2cc5934dec295"}, + {file = "pillow-12.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2bb4a8d594eacdfc59d9e5ad972aa8afdd48d584ffd5f13a937a664c3e7db0ed"}, + {file = "pillow-12.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:80b2da48193b2f33ed0c32c38140f9d3186583ce7d516526d462645fd98660ae"}, + {file = "pillow-12.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22db17c68434de69d8ecfc2fe821569195c0c373b25cccb9cbdacf2c6e53c601"}, + {file = "pillow-12.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b14cc0106cd9aecda615dd6903840a058b4700fcb817687d0ee4fc8b6e389be"}, + {file = "pillow-12.2.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cbeb542b2ebc6fcdacabf8aca8c1a97c9b3ad3927d46b8723f9d4f033288a0f"}, + {file = "pillow-12.2.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bfd07bc812fbd20395212969e41931001fd59eb55a60658b0e5710872e95286"}, + {file = "pillow-12.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9aba9a17b623ef750a4d11b742cbafffeb48a869821252b30ee21b5e91392c50"}, + {file = "pillow-12.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:deede7c263feb25dba4e82ea23058a235dcc2fe1f6021025dc71f2b618e26104"}, + {file = "pillow-12.2.0-cp314-cp314-win32.whl", hash = "sha256:632ff19b2778e43162304d50da0181ce24ac5bb8180122cbe1bf4673428328c7"}, + {file = "pillow-12.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:4e6c62e9d237e9b65fac06857d511e90d8461a32adcc1b9065ea0c0fa3a28150"}, + {file = "pillow-12.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:b1c1fbd8a5a1af3412a0810d060a78b5136ec0836c8a4ef9aa11807f2a22f4e1"}, + {file = "pillow-12.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:57850958fe9c751670e49b2cecf6294acc99e562531f4bd317fa5ddee2068463"}, + {file = "pillow-12.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5d38f1411c0ed9f97bcb49b7bd59b6b7c314e0e27420e34d99d844b9ce3b6f3"}, + {file = "pillow-12.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c0a9f29ca8e79f09de89293f82fc9b0270bb4af1d58bc98f540cc4aedf03166"}, + {file = "pillow-12.2.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1610dd6c61621ae1cf811bef44d77e149ce3f7b95afe66a4512f8c59f25d9ebe"}, + {file = "pillow-12.2.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a34329707af4f73cf1782a36cd2289c0368880654a2c11f027bcee9052d35dd"}, + {file = "pillow-12.2.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e9c4f5b3c546fa3458a29ab22646c1c6c787ea8f5ef51300e5a60300736905e"}, + {file = "pillow-12.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fb043ee2f06b41473269765c2feae53fc2e2fbf96e5e22ca94fb5ad677856f06"}, + {file = "pillow-12.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f278f034eb75b4e8a13a54a876cc4a5ab39173d2cdd93a638e1b467fc545ac43"}, + {file = "pillow-12.2.0-cp314-cp314t-win32.whl", hash = "sha256:6bb77b2dcb06b20f9f4b4a8454caa581cd4dd0643a08bacf821216a16d9c8354"}, + {file = "pillow-12.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6562ace0d3fb5f20ed7290f1f929cae41b25ae29528f2af1722966a0a02e2aa1"}, + {file = "pillow-12.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:aa88ccfe4e32d362816319ed727a004423aab09c5cea43c01a4b435643fa34eb"}, + {file = "pillow-12.2.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538bd5e05efec03ae613fd89c4ce0368ecd2ba239cc25b9f9be7ed426b0af1f"}, + {file = "pillow-12.2.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:394167b21da716608eac917c60aa9b969421b5dcbbe02ae7f013e7b85811c69d"}, + {file = "pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5d04bfa02cc2d23b497d1e90a0f927070043f6cbf303e738300532379a4b4e0f"}, + {file = "pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0c838a5125cee37e68edec915651521191cef1e6aa336b855f495766e77a366e"}, + {file = "pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a6c9fa44005fa37a91ebfc95d081e8079757d2e904b27103f4f5fa6f0bf78c0"}, + {file = "pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:25373b66e0dd5905ed63fa3cae13c82fbddf3079f2c8bf15c6fb6a35586324c1"}, + {file = "pillow-12.2.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:bfa9c230d2fe991bed5318a5f119bd6780cda2915cca595393649fc118ab895e"}, + {file = "pillow-12.2.0.tar.gz", hash = "sha256:a830b1a40919539d07806aa58e1b114df53ddd43213d9c8b75847eee6c0182b5"}, ] [package.extras] @@ -1680,20 +1680,20 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pytest" -version = "8.4.2" +version = "9.0.3" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, - {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, + {file = "pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9"}, + {file = "pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c"}, ] [package.dependencies] colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} -iniconfig = ">=1" -packaging = ">=20" +iniconfig = ">=1.0.1" +packaging = ">=22" pluggy = ">=1.5,<2" pygments = ">=2.7.2" @@ -2555,4 +2555,4 @@ scikit-learn = ["scikit-learn"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.13" -content-hash = "6697ef1fac9e13c0441b975d31d062222945bb8a35ce63d4d4d14de76951dbb5" +content-hash = "f6aa543516ad128abc176516a873e509c5ddfc3121c2156e8caf03d9ab683ee4" diff --git a/pyproject.toml b/pyproject.toml index 4a46bdb7..9958fe65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ homepage = "https://github.com/KnowledgeCaptureAndDiscovery/somef" contractions = "^0.1.73" chardet = "^5.2.0" imbalanced-learn = "^0.12.0" - pytest = "^8.0.0" + pytest = "^9.0.0" morph-kgc = "^2.7.0" bibtexparser = "^1.4.1" nbformat = "^5.9.2" diff --git a/src/somef/export/json_export.py b/src/somef/export/json_export.py index be1d66b1..0f6faff1 100644 --- a/src/somef/export/json_export.py +++ b/src/somef/export/json_export.py @@ -392,123 +392,125 @@ def format_date(date_string): if constants.CAT_CITATION in repo_data: - # url_cit = [] codemeta_output[constants.CAT_CODEMETA_REFERENCEPUBLICATION] = [] - all_reference_publications = [] - # scholarlyArticles = {} author_orcids = {} + all_reference_publications = [] - for cit in repo_data[constants.CAT_CITATION]: - scholarlyArticle = {"@type": "ScholarlyArticle"} - - doi = None - title = None - is_bibtex = False - - if constants.PROP_FORMAT in cit[constants.PROP_RESULT] and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == "cff": - yaml_content = yaml.safe_load(cit[constants.PROP_RESULT]["value"]) - preferred_citation = yaml_content.get("preferred-citation", {}) - doi = yaml_content.get("doi") or preferred_citation.get("doi") - identifiers = yaml_content.get("identifiers", []) - url_citation = preferred_citation.get("url") or yaml_content.get("url") - identifier_url = next((id["value"] for id in identifiers if id["type"] == "url"), None) - identifier_doi = next((id["value"] for id in identifiers if id["type"] == "doi"), None) - - authors = yaml_content.get("authors", []) - - title = normalize_title(preferred_citation.get("title") or yaml_content.get("title")) - - if identifier_doi: - final_url = f"https://doi.org/{identifier_doi}" - elif doi: - final_url = f"https://doi.org/{doi}" - elif identifier_url: - final_url = identifier_url - elif url_citation: - final_url = url_citation + if constants.CAT_CITATION in repo_data: + publications_source = repo_data[constants.CAT_CITATION] + else: + publications_source = [] + + if publications_source: + for cit in publications_source: + scholarlyArticle = {"@type": "ScholarlyArticle"} + + doi = None + title = None + is_bibtex = False + + if constants.PROP_FORMAT in cit[constants.PROP_RESULT] and cit[constants.PROP_RESULT][constants.PROP_FORMAT] == "cff": + yaml_content = yaml.safe_load(cit[constants.PROP_RESULT]["value"]) + preferred_citation = yaml_content.get("preferred-citation", {}) + doi = yaml_content.get("doi") or preferred_citation.get("doi") + identifiers = yaml_content.get("identifiers", []) + url_citation = preferred_citation.get("url") or yaml_content.get("url") + identifier_url = next((id["value"] for id in identifiers if id["type"] == "url"), None) + identifier_doi = next((id["value"] for id in identifiers if id["type"] == "doi"), None) + authors = yaml_content.get("authors", []) or preferred_citation.get("authors", []) + title = normalize_title(preferred_citation.get("title") or yaml_content.get("title")) + + if identifier_doi: + final_url = f"https://doi.org/{identifier_doi}" + elif doi: + final_url = f"https://doi.org/{doi}" + elif identifier_url: + final_url = identifier_url + elif url_citation: + final_url = url_citation + else: + final_url = '' + + scholarlyArticle[constants.PROP_NAME] = title + scholarlyArticle[constants.CAT_IDENTIFIER] = doi + scholarlyArticle[constants.PROP_URL] = final_url + + author_list = [] + for author in authors: + family_name = author.get("family-names") + given_name = author.get("given-names") + orcid = author.get("orcid") + name = author.get("name") + + if family_name and given_name: + author_entry = { + "@type": "Person", + "familyName": family_name, + "givenName": given_name + } + if orcid: + if not orcid.startswith("http"): # check if orcid is a url + orcid = f"https://orcid.org/{orcid}" + author_entry["@id"] = orcid + elif name: + # If there is only a name, we assume this to be an Organization. + # it could be not enough acurate + + author_entry = { + "@type": "Organization", + "name": name + } + + if family_name and given_name and orcid: + key = (family_name.lower(), given_name.lower()) + author_orcids[key] = orcid + + author_list.append({k: v for k, v in author_entry.items() if v is not None}) + + if author_list: + scholarlyArticle[constants.PROP_AUTHOR] = author_list else: - final_url = '' - - scholarlyArticle[constants.PROP_NAME] = title - scholarlyArticle[constants.CAT_IDENTIFIER] = doi - scholarlyArticle[constants.PROP_URL] = final_url - - author_list = [] - for author in authors: - family_name = author.get("family-names") - given_name = author.get("given-names") - orcid = author.get("orcid") - name = author.get("name") - - if family_name and given_name: - author_entry = { - "@type": "Person", - "familyName": family_name, - "givenName": given_name - } - if orcid: - if not orcid.startswith("http"): # check if orcid is a url - orcid = f"https://orcid.org/{orcid}" - author_entry["@id"] = orcid - elif name: - # If there is only a name, we assume this to be an Organization. - # it could be not enough acurate - - author_entry = { - "@type": "Organization", - "name": name - } - - if family_name and given_name and orcid: - key = (family_name.lower(), given_name.lower()) - author_orcids[key] = orcid - - author_list.append({k: v for k, v in author_entry.items() if v is not None}) - - if author_list: - scholarlyArticle[constants.PROP_AUTHOR] = author_list - else: - if constants.PROP_DOI in cit[constants.PROP_RESULT].keys(): - doi = cit[constants.PROP_RESULT][constants.PROP_DOI] - scholarlyArticle[constants.CAT_IDENTIFIER] = cit[constants.PROP_RESULT][constants.PROP_DOI] + if constants.PROP_DOI in cit[constants.PROP_RESULT].keys(): + doi = cit[constants.PROP_RESULT][constants.PROP_DOI] + scholarlyArticle[constants.CAT_IDENTIFIER] = cit[constants.PROP_RESULT][constants.PROP_DOI] - if constants.PROP_URL in cit[constants.PROP_RESULT].keys(): - scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_RESULT][constants.PROP_URL] + if constants.PROP_URL in cit[constants.PROP_RESULT].keys(): + scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_RESULT][constants.PROP_URL] - if constants.PROP_TITLE in cit[constants.PROP_RESULT].keys(): - title = normalize_title(cit[constants.PROP_RESULT][constants.PROP_TITLE]) - scholarlyArticle[constants.PROP_NAME] = cit[constants.PROP_RESULT][constants.PROP_TITLE] + if constants.PROP_TITLE in cit[constants.PROP_RESULT].keys(): + title = normalize_title(cit[constants.PROP_RESULT][constants.PROP_TITLE]) + scholarlyArticle[constants.PROP_NAME] = cit[constants.PROP_RESULT][constants.PROP_TITLE] - if constants.PROP_ORIGINAL_HEADER in cit[constants.PROP_RESULT].keys(): - if cit[constants.PROP_RESULT][constants.PROP_ORIGINAL_HEADER] == "Citation": - if constants.PROP_SOURCE in cit.keys(): - scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_SOURCE] + if constants.PROP_ORIGINAL_HEADER in cit[constants.PROP_RESULT].keys(): + if cit[constants.PROP_RESULT][constants.PROP_ORIGINAL_HEADER] == "Citation": + if constants.PROP_SOURCE in cit.keys(): + scholarlyArticle[constants.PROP_URL] = cit[constants.PROP_SOURCE] - is_bibtex = True + is_bibtex = True - if len(scholarlyArticle) > 1: - # look por information in values as pagination, issn and others - if re.search(r'@\w+\{', cit[constants.PROP_RESULT][constants.PROP_VALUE]): - scholarlyArticle = extract_scholarly_article_properties(cit[constants.PROP_RESULT][constants.PROP_VALUE], scholarlyArticle, 'CODEMETA') - else: - scholarlyArticle = extract_scholarly_article_natural(cit[constants.PROP_RESULT][constants.PROP_VALUE], scholarlyArticle, 'CODEMETA') + if len(scholarlyArticle) > 1: + # look por information in values as pagination, issn and others + if re.search(r'@\w+\{', cit[constants.PROP_RESULT][constants.PROP_VALUE]): + scholarlyArticle = extract_scholarly_article_properties(cit[constants.PROP_RESULT][constants.PROP_VALUE], scholarlyArticle, 'CODEMETA') + else: + scholarlyArticle = extract_scholarly_article_natural(cit[constants.PROP_RESULT][constants.PROP_VALUE], scholarlyArticle, 'CODEMETA') - all_reference_publications.append({ - **scholarlyArticle, - "_source_format": "cff" if not is_bibtex else "bibtex" - }) + all_reference_publications.append({ + **scholarlyArticle, + "_source_format": "cff" if not is_bibtex else "bibtex" + }) + + for article in all_reference_publications: + if "author" in article: + for author in article["author"]: + family_name = author.get("familyName", "").strip() + given_name = author.get("givenName", "").strip() + key = (family_name.lower(), given_name.lower()) if given_name else None - for article in all_reference_publications: - if "author" in article: - for author in article["author"]: - family_name = author.get("familyName", "").strip() - given_name = author.get("givenName", "").strip() - key = (family_name.lower(), given_name.lower()) if given_name else None + if key and key in author_orcids: + author["@id"] = author_orcids[key] - if key and key in author_orcids: - author["@id"] = author_orcids[key] - - codemeta_output[constants.CAT_CODEMETA_REFERENCEPUBLICATION] = deduplicate_publications(all_reference_publications) + codemeta_output[constants.CAT_CODEMETA_REFERENCEPUBLICATION] = deduplicate_publications(all_reference_publications) if constants.CAT_STATUS in repo_data: url_status = repo_data[constants.CAT_STATUS][0]['result'].get('value', '') @@ -585,6 +587,16 @@ def format_date(date_string): raw_contributors = repo_data[constants.CAT_CONTRIBUTORS] codemeta_output[constants.CAT_CODEMETA_CONTRIBUTOR] = parse_contributors(raw_contributors) + if constants.CAT_FUNDING in repo_data: + for funding_entry in repo_data[constants.CAT_FUNDING]: + res_fun = funding_entry[constants.PROP_RESULT] + + if constants.PROP_FUNDING in res_fun and res_fun[constants.PROP_FUNDING] != "": + codemeta_output[constants.CAT_CODEMETA_FUNDING] = res_fun[constants.PROP_FUNDING] + + if constants.PROP_FUNDER in res_fun and res_fun[constants.PROP_FUNDER] != "": + codemeta_output[constants.CAT_CODEMETA_FUNDER] = res_fun[constants.PROP_FUNDER] + # A person is expected, and we extract text at the moment if descriptions_text: codemeta_output[constants.CAT_CODEMETA_DESCRIPTION] = descriptions_text @@ -840,7 +852,6 @@ def unify_results(repo_data: dict) -> dict: This function canonicalizes simple values, detects equivalent items and merges them into a single unified entry while preserving all available information. """ - print("Unifying results...") unified_data = {} for category, items in repo_data.items(): @@ -858,16 +869,29 @@ def unify_results(repo_data: dict) -> dict: value = result.get(constants.PROP_VALUE) value_type = result.get(constants.PROP_TYPE) - canonical = canonicalize_value(value, value_type) + # --- SPECIAL LOGIC FOR LICENSES --- + if category == constants.CAT_LICENSE and result.get(constants.PROP_SPDX_ID): + # If we have SPDX, that is our unification key + key = f"LICENSE-{result[constants.PROP_SPDX_ID]}" + else: + # Normal behavior for the rest of the categories + canonical = canonicalize_value(value, value_type) + key = str(canonical) + # -------------------------------------------------- + # canonical = canonicalize_value(value, value_type) - key = str(canonical) + # key = str(canonical) if key in seen: existing = seen[key] - - # If types match, merge normally - existing[constants.PROP_RESULT][constants.PROP_VALUE] = choose_more_general( - existing[constants.PROP_RESULT][constants.PROP_VALUE], value - ) + if category == constants.CAT_LICENSE: + # prefer SPDX ID if available for licenses + if result.get(constants.PROP_SPDX_ID): + existing[constants.PROP_RESULT][constants.PROP_VALUE] = result[constants.PROP_SPDX_ID] + else: + # If types match, merge normally + existing[constants.PROP_RESULT][constants.PROP_VALUE] = choose_more_general( + existing[constants.PROP_RESULT][constants.PROP_VALUE], value + ) # merge other result fields because different techniques might have extracted different information # (e.g., email in authors extracted by file exploration or code parser. diff --git a/src/somef/extract_software_type.py b/src/somef/extract_software_type.py index 1d21f5bd..a9ad9600 100644 --- a/src/somef/extract_software_type.py +++ b/src/somef/extract_software_type.py @@ -16,7 +16,7 @@ def check_repository_type(path_repo, title, metadata_result: Result): output depending on the software type or if the repository is not considered software""" if check_static_websites(path_repo, metadata_result): - metadata_result.add_result(constants.CAT_TYPE, + metadata_result.add_result(constants.CAT_APPLICATION_TYPE, { constants.PROP_VALUE: 'static-website', constants.PROP_TYPE: constants.STRING @@ -24,7 +24,7 @@ def check_repository_type(path_repo, title, metadata_result: Result): 1, constants.TECHNIQUE_HEURISTICS) elif check_ontologies(path_repo): - metadata_result.add_result(constants.CAT_TYPE, + metadata_result.add_result(constants.CAT_APPLICATION_TYPE, { constants.PROP_VALUE: 'ontology', constants.PROP_TYPE: constants.STRING @@ -32,7 +32,7 @@ def check_repository_type(path_repo, title, metadata_result: Result): 1, constants.TECHNIQUE_HEURISTICS) elif check_notebooks(path_repo): - metadata_result.add_result(constants.CAT_TYPE, + metadata_result.add_result(constants.CAT_APPLICATION_TYPE, { constants.PROP_VALUE: 'notebook-application', constants.PROP_TYPE: constants.STRING @@ -51,7 +51,7 @@ def check_repository_type(path_repo, title, metadata_result: Result): elif check_command_line(path_repo): """The 0.82 confidence result is from running the analysis on 300 repos and showing the precision of the heuristic""" - metadata_result.add_result(constants.CAT_TYPE, + metadata_result.add_result(constants.CAT_APPLICATION_TYPE, { constants.PROP_VALUE: 'commandline-application', constants.PROP_TYPE: constants.STRING @@ -60,7 +60,7 @@ def check_repository_type(path_repo, title, metadata_result: Result): constants.TECHNIQUE_HEURISTICS) elif check_extras(path_repo): - metadata_result.add_result(constants.CAT_TYPE, + metadata_result.add_result(constants.CAT_APPLICATION_TYPE, { constants.PROP_VALUE: 'non-software', constants.PROP_TYPE: constants.STRING diff --git a/src/somef/parser/bower_parser.py b/src/somef/parser/bower_parser.py index 85d6f0e3..6ba4046f 100644 --- a/src/somef/parser/bower_parser.py +++ b/src/somef/parser/bower_parser.py @@ -138,7 +138,7 @@ def parse_bower_json_file(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "bower" }, @@ -157,7 +157,7 @@ def parse_bower_json_file(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "bower" }, diff --git a/src/somef/parser/cabal_parser.py b/src/somef/parser/cabal_parser.py index 5941a594..cbdfe2cd 100644 --- a/src/somef/parser/cabal_parser.py +++ b/src/somef/parser/cabal_parser.py @@ -207,7 +207,7 @@ def parse_cabal_file(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version_constraint, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "cabal" }, diff --git a/src/somef/parser/codemeta_parser.py b/src/somef/parser/codemeta_parser.py index a3f6028e..c4e9be93 100644 --- a/src/somef/parser/codemeta_parser.py +++ b/src/somef/parser/codemeta_parser.py @@ -63,9 +63,21 @@ def parse_license(license_data): spdx_id = identifier.split("spdx.org/licenses/")[-1].split("/")[0] license_info["spdx_id"] = spdx_id elif isinstance(license_data, str): - license_info["name"] = license_data - license_info["identifier"] = f"https://spdx.org/licenses/{license_data}" - license_info["spdx_id"] = license_data + # license_info["name"] = license_data + # license_info["identifier"] = f"https://spdx.org/licenses/{license_data}" + # license_info["spdx_id"] = license_data + license_str = license_data.strip() + + if "spdx.org/licenses/" in license_str: + # Already a full SPDX URL + license_info["identifier"] = license_str + license_info["name"] = license_str.split("/")[-1] + license_info["spdx_id"] = license_info["name"] + else: + # we assume it's an spdx id like "MIT" + license_info["name"] = license_str + license_info["identifier"] = f"https://spdx.org/licenses/{license_str}" + license_info["spdx_id"] = license_str else: return None return license_info @@ -486,25 +498,9 @@ def parse_codemeta_json_file(file_path, metadata_result: Result, source): for pub in ref_publications: pub_data = parse_referenced_publication(pub) - if pub_data: - - result_dict = { - "value": pub_data.get("title", ""), - "title": pub_data.get("title", ""), - "type": constants.SCHOLARLY_ARTICLE - } - - if pub_data.get("url"): - result_dict["url"] = pub_data.get("url") - - if pub_data.get("date_published"): - result_dict["date_published"] = pub_data.get("date_published") - - if pub_data.get("identifier"): - result_dict["doi"] = pub_data.get("identifier") - + result_dict = map_reference_publication(pub_data) + if result_dict: metadata_result.add_result( - # constants.CAT_REF_PUBLICATION, constants.CAT_CITATION, result_dict, 1, @@ -514,24 +510,9 @@ def parse_codemeta_json_file(file_path, metadata_result: Result, source): elif isinstance(ref_publications, dict): pub_data = parse_referenced_publication(ref_publications) - if pub_data: - result_dict = { - "value": pub_data.get("title", ""), - "title": pub_data.get("title", ""), - "type": constants.SCHOLARLY_ARTICLE - } - - if pub_data.get("url"): - result_dict["url"] = pub_data.get("url") - - if pub_data.get("date_published"): - result_dict["date_published"] = pub_data.get("date_published") - - if pub_data.get("identifier"): - result_dict["doi"] = pub_data.get("identifier") - + result_dict = map_reference_publication(pub_data) + if result_dict: metadata_result.add_result( - # constants.CAT_REF_PUBLICATION, constants.CAT_CITATION, result_dict, 1, @@ -540,7 +521,6 @@ def parse_codemeta_json_file(file_path, metadata_result: Result, source): ) else: metadata_result.add_result( - # constants.CAT_REF_PUBLICATION, constants.CAT_CITATION, { "value": data["referencePublication"], @@ -551,37 +531,35 @@ def parse_codemeta_json_file(file_path, metadata_result: Result, source): source ) - if "funding" in data: - funding_data = data["funding"] - if isinstance(funding_data, list): - for fund in funding_data: - fund_info = parse_funding(fund) - if fund_info: - metadata_result.add_result( - constants.CAT_FUNDING, - { - "funder": fund_info.get("funder", ""), - "funding": fund_info.get("funding", ""), - "type": constants.STRING - }, - 1, - constants.TECHNIQUE_CODE_CONFIG_PARSER, - source - ) - elif isinstance(funding_data, dict): - fund_info = parse_funding(funding_data) - if fund_info: - metadata_result.add_result( - constants.CAT_FUNDING, - { - "funder": fund_info.get("funder", ""), - "funding": fund_info.get("funding", ""), - "type": constants.STRING - }, - 1, - constants.TECHNIQUE_CODE_CONFIG_PARSER, - source - ) + + funder_data = data.get("funder") + funding_data = data.get("funding") + + if funder_data or funding_data: + main_value = funding_data if funding_data else funder_data + + if isinstance(main_value, (list, dict)): + main_value = str(main_value) + + res_fund = { + "value": main_value, + "type": constants.STRING + } + + if funder_data and (not isinstance(funder_data, list) or len(funder_data) > 0): + res_fund[constants.PROP_FUNDER] = funder_data + + if funding_data and (not isinstance(funding_data, list) or len(funding_data) > 0): + res_fund[constants.PROP_FUNDING] = funding_data + + if res_fund.get("value"): + metadata_result.add_result( + constants.CAT_FUNDING, + res_fund, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source + ) if "developmentStatus" in data: metadata_result.add_result( @@ -712,10 +690,16 @@ def parse_codemeta_json_file(file_path, metadata_result: Result, source): ) if "license" in data: - license_info = parse_license(data["license"]) + license_raw = data["license"] + license_info = parse_license(license_raw) if license_info: + if isinstance(license_raw, str): + val_lic = license_raw + else: + val_lic = license_info.get("name", "") + result_dict = { - "value": license_info.get("name", ""), + "value": val_lic, "type": constants.LICENSE } @@ -745,7 +729,7 @@ def parse_codemeta_json_file(file_path, metadata_result: Result, source): # "version": requirement.get("version"), **({"name": requirement["name"]} if "name" in requirement else {}), **({"version": requirement["version"]} if "version" in requirement else {}), - "type": constants.SOFTWARE_APPLICATION + "type": constants.SOFTWARE_DEPENDENCY }, 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, @@ -780,3 +764,63 @@ def parse_codemeta_json_file(file_path, metadata_result: Result, source): return metadata_result +def map_codemeta_author(author): + given = author.get("givenName") + family = author.get("familyName") + name = author.get("name") + + if not name and (given or family): + name = f"{given or ''} {family or ''}".strip() + + mapped = { + constants.PROP_TYPE: constants.AGENT, + constants.PROP_NAME: name, + constants.PROP_GIVEN_NAME: given, + constants.PROP_FAMILY_NAME: family + } + + identifier = author.get("identifier") or author.get("@id") + if isinstance(identifier, str) and "orcid.org" in identifier: + mapped[constants.PROP_URL] = identifier + + return {k: v for k, v in mapped.items() if v is not None} + +def map_reference_publication(pub_data): + if not pub_data: + return None + + result = { + constants.PROP_VALUE: pub_data.get("title", ""), + constants.PROP_TITLE: pub_data.get("title", ""), + constants.PROP_TYPE: constants.SCHOLARLY_ARTICLE + } + + if pub_data.get("url"): + result[constants.PROP_URL] = pub_data.get("url") + + if pub_data.get("date_published"): + result[constants.PROP_DATE_PUBLISHED] = pub_data.get("date_published") + + if pub_data.get("identifier"): + result[constants.PROP_DOI] = pub_data.get("identifier") + + authors_raw = pub_data.get("author") + + if authors_raw: + if isinstance(authors_raw, dict): + authors_iter = [authors_raw] + elif isinstance(authors_raw, list): + authors_iter = authors_raw + else: + authors_iter = [] + + mapped_authors = [ + map_codemeta_author(a) + for a in authors_iter + if isinstance(a, dict) + ] + + result["authors"] = mapped_authors + + return result + diff --git a/src/somef/parser/composer_parser.py b/src/somef/parser/composer_parser.py index 9908abce..32281155 100644 --- a/src/somef/parser/composer_parser.py +++ b/src/somef/parser/composer_parser.py @@ -169,7 +169,7 @@ def parse_composer_json(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": dep_type, "dependency_resolver": "composer" }, diff --git a/src/somef/parser/conda_environment_parser.py b/src/somef/parser/conda_environment_parser.py index b1c25e38..adbdcac2 100644 --- a/src/somef/parser/conda_environment_parser.py +++ b/src/somef/parser/conda_environment_parser.py @@ -49,7 +49,7 @@ def parse_conda_environment_file(file_path, metadata_result: Result, source): dep_dict = { constants.PROP_VALUE: dep, constants.PROP_NAME: re.split(r"[=<>!]", dep)[0], - constants.PROP_TYPE: constants.SOFTWARE_APPLICATION, + constants.PROP_TYPE: constants.SOFTWARE_DEPENDENCY, constants.PROP_DEPENDENCY_TYPE: constants.DEPENDENCY_TYPE_RUNTIME, constants.PROP_DEPENDENCY_RESOLVER: "conda" } @@ -71,7 +71,7 @@ def parse_conda_environment_file(file_path, metadata_result: Result, source): dep_dict = { constants.PROP_VALUE: dep, constants.PROP_NAME: re.split(r"[=<>!~]", dep)[0], - constants.PROP_TYPE: constants.SOFTWARE_APPLICATION, + constants.PROP_TYPE: constants.SOFTWARE_DEPENDENCY, constants.PROP_DEPENDENCY_TYPE: constants.DEPENDENCY_TYPE_RUNTIME, constants.PROP_DEPENDENCY_RESOLVER: "pip" } diff --git a/src/somef/parser/description_parser.py b/src/somef/parser/description_parser.py index 6ec783b4..23854e0a 100644 --- a/src/somef/parser/description_parser.py +++ b/src/somef/parser/description_parser.py @@ -211,7 +211,7 @@ def parse_description_file(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION + "type": constants.SOFTWARE_DEPENDENCY }, 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, diff --git a/src/somef/parser/gemspec_parser.py b/src/somef/parser/gemspec_parser.py index fa213bbc..2adffaa3 100644 --- a/src/somef/parser/gemspec_parser.py +++ b/src/somef/parser/gemspec_parser.py @@ -168,7 +168,7 @@ def parse_gemspec_file(file_path, metadata_result: Result, source): constants.CAT_REQUIREMENTS, { "value": dependencies, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY }, 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, @@ -188,7 +188,7 @@ def parse_gemspec_file(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "bundler" }, @@ -209,7 +209,7 @@ def parse_gemspec_file(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_DEVELOPMENT, "dependency_resolver": "bundler" }, diff --git a/src/somef/parser/package_json_parser.py b/src/somef/parser/package_json_parser.py index d13ea1b1..1de2f735 100644 --- a/src/somef/parser/package_json_parser.py +++ b/src/somef/parser/package_json_parser.py @@ -179,7 +179,7 @@ def parse_package_json_file(file_path, metadata_result: Result, source): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": dep_type, "dependency_resolver": "npm" }, diff --git a/src/somef/parser/pom_xml_parser.py b/src/somef/parser/pom_xml_parser.py index 68fd2603..9696bbd9 100644 --- a/src/somef/parser/pom_xml_parser.py +++ b/src/somef/parser/pom_xml_parser.py @@ -174,7 +174,7 @@ def parse_pom_file(file_path, metadata_result: Result, source): "value": f'{dependency.get("groupId", "")}.{dependency.get("artifactId", "")}'.strip("."), "name": name_d, "version": version_d, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": dep_type, "dependency_resolver": "maven" }, diff --git a/src/somef/parser/publiccode_parser.py b/src/somef/parser/publiccode_parser.py index dc028cf4..169d6438 100644 --- a/src/somef/parser/publiccode_parser.py +++ b/src/somef/parser/publiccode_parser.py @@ -231,7 +231,7 @@ def parse_publiccode_file(file_path, metadata_result: Result, source): "value": f"{name}{version_str}" if version_str else name, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "publiccode" }, diff --git a/src/somef/parser/python_parser.py b/src/somef/parser/python_parser.py index 7c062da8..0cefae2e 100644 --- a/src/somef/parser/python_parser.py +++ b/src/somef/parser/python_parser.py @@ -97,7 +97,7 @@ def parse_requirements_txt(file_path, metadata_result: Result, source): req = { "value": line, "name": name, - "type": constants.SOFTWARE_APPLICATION + "type": constants.SOFTWARE_DEPENDENCY } if version: req['version'] = version diff --git a/src/somef/parser/toml_parser.py b/src/somef/parser/toml_parser.py index 04f3e5ec..64ad254d 100644 --- a/src/somef/parser/toml_parser.py +++ b/src/somef/parser/toml_parser.py @@ -340,9 +340,10 @@ def parse_cargo_metadata(data, metadata_result, source, file_path): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": dep_type, - "dependency_resolver": "cargo" }, + "dependency_resolver": "cargo" + }, 1, constants.TECHNIQUE_CODE_CONFIG_PARSER, source @@ -362,7 +363,7 @@ def parse_cargo_metadata(data, metadata_result, source, file_path): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": dep_type, "dependency_resolver": "cargo" }, @@ -405,7 +406,7 @@ def parse_pyproject_metadata(data, metadata_result, source, file_path): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type":constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "python" }, @@ -422,7 +423,7 @@ def parse_pyproject_metadata(data, metadata_result, source, file_path): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "python" }, @@ -444,7 +445,7 @@ def parse_pyproject_metadata(data, metadata_result, source, file_path): "value": req, "name": name, "version": version, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "python" }, @@ -573,7 +574,7 @@ def parse_julia_project_metadata(data, metadata_result, source): { "value": req, "name": req, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_RUNTIME, "dependency_resolver": "julia" }, @@ -598,7 +599,7 @@ def parse_julia_project_metadata(data, metadata_result, source): { "value": req, "name": req, - "type": constants.SOFTWARE_APPLICATION, + "type": constants.SOFTWARE_DEPENDENCY, "dependency_type": constants.DEPENDENCY_TYPE_DEVELOPMENT, "dependency_resolver": "julia" }, diff --git a/src/somef/process_files.py b/src/somef/process_files.py index 9edc6baf..06a4c566 100644 --- a/src/somef/process_files.py +++ b/src/somef/process_files.py @@ -267,6 +267,7 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner filename.lower() == "pyproject.toml" or filename.lower() == "setup.py" or filename.endswith(".gemspec") or \ filename.lower() == "requirements.txt" or filename.lower() == "bower.json" or filename == "DESCRIPTION" or \ (filename.lower() == "environment.yml" or filename.lower() == "environment.yaml") or \ + (filename.lower() == ".zenodo.json") or \ (filename.lower() == "cargo.toml" and repo_relative_path == ".") or (filename.lower() == "composer.json" and repo_relative_path == ".") or \ (filename == "Project.toml" or (filename.lower()== "publiccode.yml" or filename.lower()== "publiccode.yaml") and repo_relative_path == "."): if filename.lower() in parsed_build_files and repo_relative_path != ".": @@ -309,9 +310,9 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner if filename.lower() == "publiccode.yml" or filename.lower() == "publiccode.yaml": metadata_result = parse_publiccode_file(os.path.join(dir_path, filename), metadata_result, build_file_url) if filename.lower() == "environment.yml" or filename.lower() == "environment.yaml": - print("Processing conda environment file...") metadata_result = parse_conda_environment_file(os.path.join(dir_path, filename), metadata_result, build_file_url) - + # if filename.lower() == ".zenodo": + # metadata_result = parse_zenodo_file(os.path.join(dir_path, filename), metadata_result, build_file_url) parsed_build_files.add(filename.lower()) # if repo_type == constants.RepositoryType.GITLAB: @@ -370,8 +371,10 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner if 'citation' in metadata_result.results: for cit in metadata_result.results['citation']: - scholarly_article = {} result = cit.get(constants.PROP_RESULT, {}) + + scholarly_article = {} + # result = cit.get(constants.PROP_RESULT, {}) value = result.get(constants.PROP_VALUE, '') if re.search(r'@\w+\{', value): scholarly_article = extract_scholarly_article_properties(value, scholarly_article, 'JSON') @@ -503,7 +506,9 @@ def get_file_content_or_link(repo_type, file_path, owner, repo_name, repo_defaul if license_info: result[constants.PROP_NAME] = license_info['name'] result[constants.PROP_SPDX_ID] = license_info['spdx_id'] - + if '@id' in license_info: + result[constants.PROP_URL] = license_info['@id'] + result[constants.PROP_IDENTIFIER] = license_info['@id'] # Extraction copyright holder from license text matches_copyright = re.findall(constants.REGEXP_COPYRIGHT, license_text, flags=re.IGNORECASE) @@ -564,70 +569,39 @@ def get_file_content_or_link(repo_type, file_path, owner, repo_name, repo_defaul ) # Properties extraction from cff if format_result == 'cff': - yaml_content = yaml.safe_load(file_text) - preferred_citation = yaml_content.get("preferred-citation", {}) - doi = yaml_content.get("doi") or preferred_citation.get("doi") - identifiers = yaml_content.get("identifiers", []) - url_citation = preferred_citation.get("url") or yaml_content.get("url") - - if identifiers: - result[constants.CAT_IDENTIFIER] = identifiers - - identifier_url = next((id["value"] for id in identifiers if id["type"] == "url"), None) - identifier_doi = next((id["value"] for id in identifiers if id["type"] == "doi"), None) - - title = yaml_content.get("title") or preferred_citation.get("title", None) - authors = yaml_content.get("authors", []) - - if identifier_doi: - final_url = f"https://doi.org/{identifier_doi}" - elif doi: - final_url = f"https://doi.org/{doi}" - elif identifier_url: - final_url = identifier_url - elif url_citation: - final_url = url_citation - else: - final_url = '' - - author_list = [] - for author in authors: - family_name = author.get("family-names") - given_name = author.get("given-names") - orcid = author.get("orcid") - name = author.get("name") - - if family_name and given_name: - author_entry = { - "type": "Agent", - "name": f"{given_name} {family_name}", - "family_name": family_name, - "given_name": given_name - } - if orcid: - if not orcid.startswith("http"): # check if is a url - orcid = f"https://orcid.org/{orcid}" - author_entry["url"] = orcid - elif name: - # If there is only a name, we assume this to be an Organization. - # it could be not enough acurate - - author_entry = { - "type": "Agent", - "name": name - } - - author_list.append({k: v for k, v in author_entry.items() if v is not None}) + try: + yaml_content = yaml.safe_load(file_text) + except Exception: + yaml_content = None + + if yaml_content: + license_value = yaml_content.get("license") + logging.info(f"Extracted license value from CFF: {license_value}") + if license_value: + if isinstance(license_value, list): + license_value = license_value[0] + parse_license_cff(license_value, metadata_result, url) + + root_result = parse_cff_root(yaml_content, metadata_result,url) + root_result[constants.PROP_VALUE] = file_text + # root_result[constants.PROP_TYPE] = constants.FILE_DUMP + metadata_result.add_result( + category, root_result, 1, + constants.TECHNIQUE_FILE_EXPLORATION, url + ) - if author_list: - result[constants.PROP_AUTHOR] = author_list - if title: - result[constants.PROP_TITLE] = title - if final_url: - result[constants.PROP_URL] = final_url - if doi: - result[constants.PROP_DOI] = doi + pref = yaml_content.get("preferred-citation") + if pref: + pref_result = parse_cff_preferred(pref) + pref_result[constants.PROP_VALUE] = yaml.dump({"preferred-citation": pref}, default_flow_style=False) + # pref_result[constants.PROP_TYPE] = constants.FILE_DUMP + metadata_result.add_result( + constants.CAT_CITATION, pref_result, 1, + constants.TECHNIQUE_FILE_EXPLORATION, url + ) + return metadata_result + if format_result != "": result[constants.PROP_FORMAT] = format_result @@ -635,7 +609,8 @@ def get_file_content_or_link(repo_type, file_path, owner, repo_name, repo_defaul metadata_result.edit_hierarchical_result(category, result, 1, constants.TECHNIQUE_FILE_EXPLORATION, url) else: metadata_result.add_result(category, result, 1, constants.TECHNIQUE_FILE_EXPLORATION, url) - except: + except Exception as e: + logging.error(f"Error occurred while processing file {url}: {e}") if replace: metadata_result.edit_hierarchical_result(category, { @@ -709,3 +684,109 @@ def clean_text(text): cleaned_lines.append(line) return "\n".join(cleaned_lines) + +def parse_authors_citation(author_list): + authors = [] + for author in author_list: + family = author.get("family-names") + given = author.get("given-names") + orcid = author.get("orcid") + name = author.get(constants.PROP_NAME) + + if family and given: + entry = { + constants.PROP_TYPE: "Agent", + constants.PROP_NAME: f"{given} {family}", + constants.PROP_FAMILY_NAME: family, + constants.PROP_GIVEN_NAME: given + } + if orcid: + if not orcid.startswith("http"): + orcid = f"https://orcid.org/{orcid}" + entry[constants.PROP_URL] = orcid + elif name: + entry = { + constants.PROP_TYPE: "Agent", + constants.PROP_NAME: name + } + else: + continue + + authors.append(entry) + + return authors + + +def parse_cff_root(yaml_content, metadata_result, url): + result = {} + + result[constants.PROP_TITLE] = yaml_content.get("title") + result["authors"] = parse_authors_citation(yaml_content.get("authors", [])) + result[constants.PROP_VERSION] = yaml_content.get("version") + result[constants.PROP_DOI] = yaml_content.get("doi") + result[constants.PROP_URL] = yaml_content.get("url") + result[constants.PROP_TYPE] = constants.SOFTWARE_APPLICATION + # cff_type = yaml_content.get("type") + # result[constants.PROP_TYPE] = cff_type if cff_type else constants.FILE_DUMP + + identifiers = yaml_content.get("identifiers", []) + if identifiers: + result[constants.PROP_IDENTIFIER] = identifiers + + result[constants.PROP_FORMAT] = "cff" + + return clean_nulls(result) + +def parse_cff_preferred(pref): + result = {} + + result[constants.PROP_TITLE] = pref.get("title") + result["authors"] = parse_authors_citation(pref.get("authors", [])) + result[constants.PROP_DOI] = pref.get("doi") + result[constants.PROP_URL] = pref.get("url") + result[constants.PROP_JOURNAL] = pref.get("journal") + result[constants.PROP_YEAR] = pref.get("year") + result[constants.PROP_PAGES] = pref.get("pages") + result[constants.PROP_TYPE] = constants.SCHOLARLY_ARTICLE + # cff_type = pref.get("type") + # result[constants.PROP_TYPE] = cff_type if cff_type else constants.FILE_DUMP + + result[constants.PROP_PREFERRED_CITATION] = "True" + result[constants.PROP_FORMAT] = "cff" + + return clean_nulls(result) + +def clean_nulls(d: dict) -> dict: + return {k: v for k, v in d.items() if v not in (None, "")} + +def parse_license_cff(license_value, metadata_result, url): + + try: + license_info = detect_license_spdx(license_value, 'JSON') + + license_result = { + constants.PROP_VALUE: license_value, + constants.PROP_TYPE: constants.FILE_DUMP + } + + if license_info: + license_result[constants.PROP_NAME] = license_info['name'] + license_result[constants.PROP_SPDX_ID] = license_info['spdx_id'] + + license_result[constants.PROP_URL] = license_info.get("@id") + else: + license_result[constants.PROP_NAME] = license_value + + + metadata_result.add_result( + constants.CAT_LICENSE, + license_result, + 1, + constants.TECHNIQUE_FILE_EXPLORATION, + url + ) + except Exception as e: + logging.error(f"Error parsing license from CFF: {str(e)}") + + + diff --git a/src/somef/process_repository.py b/src/somef/process_repository.py index 79932aae..7b4235e8 100644 --- a/src/somef/process_repository.py +++ b/src/somef/process_repository.py @@ -330,10 +330,11 @@ def load_gitlab_repository_metadata(repo_metadata: Result, repository_url): license_result[constants.PROP_NAME] = general_resp["license"]["name"] if "url" in general_resp['license']: license_result[constants.PROP_VALUE] = general_resp["license"]["url"] - - # for k in ('name', 'url'): - # if k in general_resp['license']: - # license_info[k] = general_resp['license'][k] + temp_info_lic = detect_license_spdx(general_resp["license"]["name"], 'JSON') + if temp_info_lic: + license_result[constants.PROP_SPDX_ID] = temp_info_lic['spdx_id'] + license_result[constants.PROP_URL] = temp_info_lic['url'] + license_result[constants.PROP_IDENTIFIER] = temp_info_lic['identifier'] # If we didn't find it, look for the license if constants.PROP_VALUE not in license_result or license_result[constants.PROP_VALUE] is None: @@ -347,6 +348,7 @@ def load_gitlab_repository_metadata(repo_metadata: Result, repository_url): if license_info: license_result[constants.PROP_NAME] = license_info['name'] license_result[constants.PROP_SPDX_ID] = license_info['spdx_id'] + license_result[constants.PROP_IDENTIFIER] = license_info['identifier'] if constants.PROP_VALUE in license_result: repo_metadata.add_result(constants.CAT_LICENSE, license_result, 1, constants.TECHNIQUE_GITLAB_API) @@ -646,7 +648,11 @@ def load_online_repository_metadata(repository_metadata: Result, repository_url, constants.PROP_URL: value["url"] } if "spdx_id" in value.keys(): - result[constants.PROP_SPDX_ID] = value["spdx_id"] + spdx_id = value["spdx_id"] + spdx_url = f"https://spdx.org/licenses/{spdx_id}" + result[constants.PROP_SPDX_ID] = spdx_id + result[constants.PROP_URL] = spdx_url + result[constants.PROP_IDENTIFIER] = spdx_url elif category == constants.CAT_OWNER: result = { constants.PROP_VALUE: value, diff --git a/src/somef/regular_expressions.py b/src/somef/regular_expressions.py index 5d5e6538..f11a34ff 100644 --- a/src/somef/regular_expressions.py +++ b/src/somef/regular_expressions.py @@ -1013,24 +1013,33 @@ def detect_license_spdx(license_text, type): for license_name, license_info in constants.LICENSES_DICT.items(): if re.search(license_info["regex"], license_text, re.IGNORECASE): + spdx_id = license_info['spdx_id'] + spdx_url = f"https://spdx.org/licenses/{spdx_id}" if type == 'JSON': return { "name": license_name, "spdx_id": f"{license_info['spdx_id']}", - "@id": f"https://spdx.org/licenses/{license_info['spdx_id']}" + "@id": spdx_url, + "url": spdx_url, + "identifier": spdx_url } else: return { "name": license_name, - "identifier": f"https://spdx.org/licenses/{license_info['spdx_id']}" + "identifier": spdx_url, + "spdx_id": spdx_id, + "url": spdx_url } for license_name, license_info in constants.LICENSES_DICT.items(): spdx_id = license_info["spdx_id"] if re.search(rf'\b{re.escape(spdx_id)}\b', license_text, re.IGNORECASE): + spdx_url = f"https://spdx.org/licenses/{spdx_id}" return { "name": license_name, "spdx_id": spdx_id, - "@id": f"https://spdx.org/licenses/{spdx_id}" + "@id": spdx_url, + "identifier": spdx_url, + "url": spdx_url } return None @@ -1062,7 +1071,8 @@ def extract_scholarly_article_properties(bibtex_entry, scholarlyArticle, type): year_match = re.search(constants.REGEXP_YEAR, bibtex_entry) month_match = re.search(constants.REGEXP_MONTH, bibtex_entry) pages_match = re.search(constants.REGEXP_PAGES, bibtex_entry) - author_match = re.search(r'author\s*=\s*\{([^}]+)\}', bibtex_entry) + # author_match = re.search(r'author\s*=\s*\{([^}]+)\}', bibtex_entry) + author_match = re.search(r'author\s*=\s*\{(.+?)\}\s*,', bibtex_entry) orcid_match = re.search(r'orcid\s*=\s*\{([^}]+)\}', bibtex_entry) # Look for ORCID explícit note_orcid_match = re.search(r'ORCID[:\s]*([\d-]+X?)', bibtex_entry) # Look in notes @@ -1087,12 +1097,24 @@ def extract_scholarly_article_properties(bibtex_entry, scholarlyArticle, type): authors = author_match.group(1).split(" and ") # Split several authors for author in authors: - parts = author.split(", ") - if len(parts) == 2: + # parts = author.split(", ") + # if len(parts) == 2: + # family_name, given_name = parts + # else: + # family_name = author + # given_name = None + match_author = re.match(r'(.+?)\s*\{(.+?)\}', author) + + if match_author: + given_name = match_author.group(1).strip() + family_name = match_author.group(2).strip() + elif "," in author: + parts = [p.strip() for p in author.split(",", 1)] family_name, given_name = parts else: - family_name = author - given_name = None + parts = author.split() + family_name = parts[-1] + given_name = " ".join(parts[:-1]) if len(parts) > 1 else None if type == 'JSON': author_entry = { diff --git a/src/somef/test/test_JSON_export.py b/src/somef/test/test_JSON_export.py index afaf5b14..eee0dafb 100644 --- a/src/somef/test/test_JSON_export.py +++ b/src/somef/test/test_JSON_export.py @@ -156,7 +156,7 @@ def test_issue_629(self): "doi" in entry.get("result", {}) and "title" in entry.get("result", {}) for entry in citation - ), "Citation.cff must have doi and title" + ), "Citation.cff must have doi and title in the result" # os.remove(test_data_path + "test_issue_629.json") @@ -528,7 +528,7 @@ def test_unify_json(self): json_content = json.load(f) requirements = json_content.get(constants.CAT_REQUIREMENTS, []) - print(json.dumps(requirements, indent=2)) + # print(json.dumps(requirements, indent=2)) unified_reqs = [ r for r in requirements if "You will need Java 1.8" in r["result"].get("value", "") ] assert unified_reqs, "There should be at least one unified Java requirement entry" @@ -587,6 +587,62 @@ def test_unify_json_2(self): os.remove(test_data_path + "test_somef_unify.json") + + def test_new_properties_citation_issue_935(self): + """ + Checks that duplicated requirement entries extracted by different techniques + are unified into a single item, preserving all complementary information + (techniques, sources, and result fields). + """ + + output_path = test_data_path + 'test_new_properties_citation_issue_935.json' + + somef_cli.run_cli( threshold=0.8, + local_repo=test_data_repositories + "somef_repo", + doc_src=None, + in_file=None, + output=output_path, + graph_out=None, + graph_format="turtle", + codemeta_out=None, + pretty=True, + missing=False, + readme_only=False) + + + with open(output_path, "r") as f: + json_content = json.load(f) + + citations = json_content.get(constants.CAT_CITATION, []) + + # We omit 'is_preferred_citation: False'. + # we use just the flag is_preferred_citation: True to identify the preferred citation. + software_entry = next( + (cit for cit in citations if not cit["result"].get("is_preferred_citation") and + cit["result"].get("type") == "SoftwareApplication"), + None + ) + preferred_entry = next( + (cit for cit in citations if str(cit["result"].get("is_preferred_citation")) == "True"), + None + ) + + assert software_entry is not None, "Software citation (root) not found" + sw_result = software_entry["result"] + assert sw_result["title"] == 'SOMEF: Software metadata extraction framework' + assert sw_result["version"] == "0.1.0" + assert "doi" not in sw_result or sw_result.get("doi") is None # it is in preferred (referencePublication) but not in the root + + assert preferred_entry is not None, "Preferred citation (article) not found" + pref_result = preferred_entry["result"] + assert pref_result["title"] == "A Framework for Creating Knowledge Graphs of Scientific Software Metadata" + assert pref_result["doi"] == "10.1162/qss_a_00167" + assert pref_result["journal"] == "Quantitative Science Studies" + assert "version" not in pref_result # it is in the root in citation but not in the preferred (referencePublication) + + os.remove(test_data_path + "test_new_properties_citation_issue_935.json") + + @unittest.skipIf(os.getenv("CI") == "true", "Skipped in CI because it is already verified locally") def test_issue_gitlab_enrich_authors(self): """Tests if a gitlab repository with codeowners file gets enriched with the information of the users in the codeowners file. @@ -714,9 +770,52 @@ def test_issue_886_apache(self): data = text_file.read() text_file.close() json_content = json.loads(data) - copyright_entries = json_content[constants.CAT_COPYRIGHT] copy = copyright_entries[0]["result"] assert copy["value"] == "Daniel Garijo, Information Sciences Institute, USC." assert copy["year"] == "2016" - os.remove(test_data_path + "test_issue_886_apache.json") \ No newline at end of file + os.remove(test_data_path + "test_issue_886_apache.json") + + + def test_issue_955_license_consolidation(self): + """Checks whether licenses are correctly consolidated and enriched with SPDX metadata""" + output_path = test_data_path + "test_issue_955_license_consolidation.json" + + somef_cli.run_cli(threshold=0.8, + ignore_classifiers=False, + repo_url=None, + local_repo=test_data_repositories + "Widoco", + doc_src=None, + in_file=None, + output=output_path, + graph_out=None, + graph_format="turtle", + codemeta_out=None, + pretty=True, + missing=False, + readme_only=False) + + with open(output_path, "r") as text_file: + json_content = json.loads(text_file.read()) + + assert constants.CAT_LICENSE in json_content + license_entries = json_content[constants.CAT_LICENSE] + + assert len(license_entries) == 1 + + license_res = license_entries[0]["result"] + + assert license_res["value"] == "Apache-2.0" + assert license_res["spdx_id"] == "Apache-2.0" + assert license_res["name"] == "Apache License 2.0" + assert license_res["url"] == "https://spdx.org/licenses/Apache-2.0" + assert license_res["identifier"] == "https://spdx.org/licenses/Apache-2.0" + + assert isinstance(license_entries[0]["technique"], list) + assert "file_exploration" in license_entries[0]["technique"] + assert "code_parser" in license_entries[0]["technique"] + + assert isinstance(license_entries[0]["source"], list) + assert len(license_entries[0]["source"]) >= 2 + + os.remove(output_path) \ No newline at end of file diff --git a/src/somef/test/test_cli.py b/src/somef/test/test_cli.py index c2b68105..bd291013 100644 --- a/src/somef/test/test_cli.py +++ b/src/somef/test/test_cli.py @@ -961,7 +961,7 @@ def test_categorization(self): data = text_file.read() text_file.close() json_content = json.loads(data) - repo_status = json_content[constants.CAT_TYPE][0] + repo_status = json_content[constants.CAT_APPLICATION_TYPE][0] print(repo_status) repo_type = repo_status[constants.PROP_RESULT][constants.PROP_VALUE] print(repo_type) @@ -990,6 +990,6 @@ def test_redundant_files(self): data = text_file.read() text_file.close() json_content = json.loads(data) - t = json_content[constants.CAT_TYPE][0] + t = json_content[constants.CAT_APPLICATION_TYPE][0] assert t[constants.PROP_RESULT][constants.PROP_VALUE] == "ontology" os.remove(test_data_path + "test-ecfo.json") diff --git a/src/somef/test/test_codemeta_export.py b/src/somef/test/test_codemeta_export.py index 84d1a7f6..30369661 100644 --- a/src/somef/test/test_codemeta_export.py +++ b/src/somef/test/test_codemeta_export.py @@ -642,7 +642,6 @@ def test_issue_936_contributors(self): json_content = json.loads(data) contributors = json_content[constants.CAT_CODEMETA_CONTRIBUTOR] - print(contributors) self.assertTrue(any( c["name"] == "Abby Cabunoc Mayes" and c.get("givenName") == "Abby Cabunoc" @@ -667,6 +666,46 @@ def test_issue_936_contributors(self): os.remove(test_data_path + "test_issue_936_contributors.json") + + def test_issue_960_funding(self): + """Checks whether funding and funder information are correctly extracted and exported to CodeMeta""" + output_path = test_data_path + "test_issue_960_funding.json" + + somef_cli.run_cli(threshold=0.8, + ignore_classifiers=False, + repo_url=None, + local_repo=test_data_repositories + "codemeta_repo", + doc_src=None, + in_file=None, + output=None, + graph_out=None, + graph_format="turtle", + codemeta_out=output_path, + pretty=True, + missing=False, + readme_only=False) + + text_file = open(output_path, "r") + data = text_file.read() + text_file.close() + json_content = json.loads(data) + + expected_funding = "1549758; Codemeta: A Rosetta Stone for Metadata in Scientific Software" + self.assertEqual(json_content.get("funding"), expected_funding, + f"Expected funding '{expected_funding}' not found in exported CodeMeta") + + funder = json_content.get("funder") + self.assertIsNotNone(funder, "Funder field missing in exported CodeMeta") + + if isinstance(funder, dict): + self.assertEqual(funder.get("name"), "National Science Foundation", "Funder name mismatch") + self.assertEqual(funder.get("@id"), "https://doi.org/10.13039/100000001", "Funder @id mismatch") + else: + self.assertEqual(funder, "National Science Foundation", "Funder name mismatch") + + os.remove(output_path) + + @classmethod def tearDownClass(cls): """delete temp file JSON just if all the test pass""" diff --git a/src/somef/test/test_codemeta_parser.py b/src/somef/test/test_codemeta_parser.py index de36c783..c9c9f769 100644 --- a/src/somef/test/test_codemeta_parser.py +++ b/src/somef/test/test_codemeta_parser.py @@ -46,6 +46,8 @@ def test_parse_multiple_codemeta_files(self): for cat_name, expected_val in expected.items(): cat_const = getattr(constants, cat_name) actual_list = metadata_result.results.get(cat_const, []) + # print(f"Actual list for {cat_name}: {actual_list}") + self.assertTrue( actual_list, f"[{repo_folder}] No results for {cat_name}" @@ -93,5 +95,28 @@ def test_parse_contributors(self): )) + def test_parse_reference_publications_authors_issue_957(self): + """ + Test to ensure that authors in the citation category correctly use the 'given_name' and + 'family_name' properties instead of the old camelCase convention. + """ + codemeta_path = REPOS_DIR / "Widoco" / "codemeta.json" + result = Result() + + metadata_result = parse_codemeta_json_file(codemeta_path, result, "https://example.org/codemeta.json") + + self.assertIn(constants.CAT_CITATION, metadata_result.results) + citations = result.results[constants.CAT_CITATION] + found = False + + for cit in citations: + authors = cit["result"].get("authors", []) + if any(a.get("name") == "Daniel Garijo" and a.get("family_name") == "Garijo" and a.get("given_name") == "Daniel" for a in authors): + found = True + break + + self.assertTrue(found, "Author 'Daniel Garijo' with 'given_name' not found in citation authors") + + if __name__ == "__main__": unittest.main() diff --git a/src/somef/test/test_data/expected/Widoco.yaml b/src/somef/test/test_data/expected/Widoco.yaml index a7465b3b..747483bb 100644 --- a/src/somef/test/test_data/expected/Widoco.yaml +++ b/src/somef/test/test_data/expected/Widoco.yaml @@ -23,7 +23,7 @@ CAT_DESCRIPTION: # Passed - "WIDOCO helps you to publish and create an enriched and customized documentation of your ontology, by following a series of steps in a wizard. We extend the LODE framework by Silvio Peroni to describe the classes, properties and data properties of the ontology, the OOPS! webservice by María Poveda to print an evaluation and the Licensius service by Victor Rodriguez Doncel to determine the license URI and title being used. In addition, we use WebVowl to visualize the ontology and have extended Bubastis to show a complete changelog between different versions of your ontology.\n\nFeatures of WIDOCO:\n* Automatic documentation of the terms in your ontology (based on [LODE](http://www.essepuntato.it/lode/)). Now **you can use Markdown on your class descriptions** (see [example](https://dgarijo.github.io/Widoco/doc/gallery/index.html))\n* Massive metadata extraction and support: WIDOCO will enhance your ontology documentation based on your ontology annotations. Now you can add custom logos and images, edit the content of your sections, etc. by just editing metadata. See our [supported metadata](doc/metadataGuide/guide.md) and [recommendations](https://dgarijo.github.io/Widoco/doc/bestPractices/index-en.html) for more information.\n* Automatic annotation in JSON-LD snippets of the html produced.\n* Association of a provenance page which includes the history of your vocabulary (W3C PROV-O compliant).\n* Guidelines on the main sections that your document should have and how to complete them.\n* Integration with diagram creators ([WebVOWL](http://vowl.visualdataweb.org/webvowl/)).\n* Automatic changelog of differences between the actual and the previous version of the ontology (based on [Bubastis](http://www.ebi.ac.uk/efo/bubastis/)).\n* Separation of the sections of your html page so you can write them independently and replace only those needed.\n* Content negotiation and serialization of your ontology according to [W3C best practices](https://www.w3.org/TR/swbp-vocab-pub/)\n* Evaluation reports of your ontology (using the [OOPS! web service](https://oops.linkeddata.es/))\n* Integration with license metadata services ([Licensius](http://licensius.com/)) to automatically describe the license used in your ontology.\n" # Passed CAT_CITATION: # Passed title: "WIDOCO: a wizard for documenting ontologies" - type: Scholarly_article + type: ScholarlyArticle url: http://dgarijo.com/papers/widoco-iswc2017.pdf date_published: "2017" doi: "10.1007/978-3-319-68204-4_9" diff --git a/src/somef/test/test_data/expected/gammapy.yaml b/src/somef/test/test_data/expected/gammapy.yaml index b3953f03..cff9059c 100644 --- a/src/somef/test/test_data/expected/gammapy.yaml +++ b/src/somef/test/test_data/expected/gammapy.yaml @@ -24,7 +24,7 @@ CAT_REQUIREMENTS: # Passed value: numpy>=1.21 name: numpy version: ">=1.21" - type: Software_application + type: SoftwareDependency CAT_AUTHORS: # Passed value: Fabio Acero diff --git a/src/somef/test/test_data/repositories/somef_repo/CITATION.cff b/src/somef/test/test_data/repositories/somef_repo/CITATION.cff new file mode 100644 index 00000000..ee3ba814 --- /dev/null +++ b/src/somef/test/test_data/repositories/somef_repo/CITATION.cff @@ -0,0 +1,43 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: 'SOMEF: Software metadata extraction framework' +message: >- + If you use this software, please cite both the article + from preferred-citation and the software itself. +type: software +authors: + - family-names: Garijo + given-names: Daniel + orcid: 'https://orcid.org/0000-0003-0454-7145' + - family-names: Mao + given-names: Allen + - family-names: Dharmala + given-names: Haripriya + - family-names: Diwanji + given-names: Cedant + - family-names: Wang + given-names: Jiajing + - family-names: Kelley + given-names: Aidan + - family-names: García + given-names: Miguel Angel + - family-names: Ciuciu-Kiss + given-names: Jenifer + - family-names: Mendoza + given-names: Juanje +license: Apache-2.0 +version: 0.1.0 +preferred-citation: + authors: + - family-names: Kelley + given-names: Aidan + - family-names: Garijo + given-names: Daniel + title: A Framework for Creating Knowledge Graphs of Scientific Software Metadata + type: article + journal: Quantitative Science Studies + pages: 1-37 + year: 2021 + doi: 10.1162/qss_a_00167 \ No newline at end of file diff --git a/src/somef/test/test_process_repository.py b/src/somef/test/test_process_repository.py index e6b552da..2efa9581 100644 --- a/src/somef/test/test_process_repository.py +++ b/src/somef/test/test_process_repository.py @@ -190,9 +190,8 @@ def test_issue_526(self): github_data = Result() text, github_data = process_files.process_repository_files(test_data_repositories + "Widoco", github_data, constants.RepositoryType.LOCAL) - # after solving issue refernce_publication it must be 2 citations in results citation. - # assert len(github_data.results[constants.CAT_CITATION]) == 1 - assert len(github_data.results[constants.CAT_CITATION]) == 2 + # after solving issue refernce_publication it must be 3 citation. 1 should the preferred one from the cff file, + assert len(github_data.results[constants.CAT_CITATION]) == 3 def test_issue_530(self): """ @@ -204,8 +203,10 @@ def test_issue_530(self): constants.RepositoryType.LOCAL) licenses = github_data.results[constants.CAT_LICENSE] citation = github_data.results[constants.CAT_CITATION] + # there are two licenses because the codemeta parser obtains one - assert len(licenses) == 2 and "LICENSE" or "codemeta" in licenses[0]["source"] and \ + # after extracting the license from citation.cff now we should have 3 + assert len(licenses) == 3 and "LICENSE" or "codemeta" in licenses[0]["source"] and \ len(citation) == 1 and "example_onto" not in citation[0]["source"] def test_issue_611(self): @@ -333,7 +334,6 @@ def test_issue_905_tag(self): assert os.path.exists(test_data_path + "test_905_tag.json") version = json_content.get(constants.CAT_VERSION, []) - print(version) source = version[0].get("source", "") assert "Widoco/v1.4.25" in source, f"The downloaded tag does not match the requested one. Source: {source}" diff --git a/src/somef/test/test_toml_parser.py b/src/somef/test/test_toml_parser.py index 5cbae680..5de55849 100644 --- a/src/somef/test/test_toml_parser.py +++ b/src/somef/test/test_toml_parser.py @@ -152,7 +152,7 @@ def test_parse_pluto_project_toml(self): self.assertIn("REPL", dep_values) for req in requirements_results: - self.assertEqual(req["result"]["type"], constants.SOFTWARE_APPLICATION) + self.assertEqual(req["result"]["type"], constants.SOFTWARE_DEPENDENCY) self.assertEqual(req["technique"], constants.TECHNIQUE_CODE_CONFIG_PARSER) runtime_results = metadata_result.results.get(constants.CAT_RUNTIME_PLATFORM, []) diff --git a/src/somef/utils/constants.py b/src/somef/utils/constants.py index 4d2c476e..af200639 100644 --- a/src/somef/utils/constants.py +++ b/src/somef/utils/constants.py @@ -64,7 +64,8 @@ # REGEXP_APACHE = r'(?i)apache\s+license\s*,?\s*version\s*2\.0' REGEXP_APACHE = r'(?i)apache(?:\s+license)?\s*(?:,?\s*version\s*)?2\.0' REGEXP_GPL3 = r'(?i)gnu\s+general\s+public\s+license\s*,?\s*version\s*3\.0' -REGEXP_MIT = r'(?i)mit\s+license' +# REGEXP_MIT = r'(?i)mit\s+license' +REGEXP_MIT = r'(?i)(mit\s+license|permission\s+is\s+hereby\s+granted|THE\s+SOFTWARE\s+IS\s+PROVIDED\s+"AS\s+IS")' REGEXP_BSD2 = r'(?i)(bsd\s*-?\s*2-?clause(?:\s*license)?|redistribution\s+and\s+use\s+in\s+source\s+and\s+binary\s+forms)' REGEXP_BSD3 = r'(?i)bsd\s+3-clause\s+license' REGEXP_BOOST = r'(?i)boost\s+software\s+license\s*,?\s*version\s*1\.0' @@ -158,6 +159,7 @@ """ CAT_PROGRAMMING_LANGUAGES = "programming_languages" CAT_README_URL = "readme_url" +# CAT_REFERENCE_PUBLICATION = "reference_publication" CAT_RELATED_DOCUMENTATION = "related_documentation" CAT_RELATED_PAPERS = "related_papers" CAT_RELEASES = "releases" @@ -170,7 +172,7 @@ CAT_SUPPORT_CHANNELS = "support_channels" CAT_USAGE = "usage" CAT_WORKFLOWS = "workflows" -CAT_TYPE = "type" +CAT_APPLICATION_TYPE = "application_type" # former CAT_TYPE CAT_PACKAGE_ID = "package_id" CAT_HAS_PACKAGE_FILE = "has_package_file" CAT_VERSION = "version" @@ -196,7 +198,7 @@ CAT_OWNER, CAT_PACKAGE_DISTRIBUTION, CAT_HAS_PACKAGE_FILE, CAT_PROGRAMMING_LANGUAGES, CAT_README_URL, CAT_RELATED_DOCUMENTATION, CAT_RELEASES, CAT_RUN, CAT_RUNTIME_PLATFORM, CAT_RELATED_PAPERS, CAT_STATUS, CAT_REQUIREMENTS, CAT_STARS, CAT_SUPPORT, CAT_SUPPORT_CHANNELS, CAT_USAGE, - CAT_WORKFLOWS, CAT_TYPE] + CAT_WORKFLOWS, CAT_APPLICATION_TYPE] # All properties used by SOMEF to label the output JSON # Provenance: @@ -230,12 +232,19 @@ PROP_DEPENDENCY_RESOLVER = "dependency_resolver" PROP_EMAIL = "email" PROP_GIVEN_NAME = "given_name" +PROP_FAMILY_NAME = "family_name" +PROP_FUNDER = "funder" +PROP_FUNDING = "funding" PROP_HTML_URL = "html_url" PROP_IDENTIFIER = "identifier" +PROP_JOURNAL = "journal" PROP_LAST_NAME = "last_name" +PROP_LICENSE = "license" PROP_NAME = "name" PROP_ORIGINAL_HEADER = "original_header" +PROP_PAGES = "pages" PROP_PARENT_HEADER = "parent_header" +PROP_PREFERRED_CITATION = "is_preferred_citation" PROP_RELEASE_ID = "release_id" PROP_ROLE = "role" PROP_SIZE = "size" @@ -284,10 +293,11 @@ AGENT = "Agent" RELEASE = "Release" LICENSE = "License" -PUBLICATION = "Publication" +# PUBLICATION = "Publication" LANGUAGE = "Programming_language" -SOFTWARE_APPLICATION = "Software_application" -SCHOLARLY_ARTICLE = "Scholarly_article" +SOFTWARE_APPLICATION = "SoftwareApplication" +SOFTWARE_DEPENDENCY = "SoftwareDependency" +SCHOLARLY_ARTICLE = "ScholarlyArticle" # Different techniques TECHNIQUE_SUPERVISED_CLASSIFICATION = "supervised_classification" @@ -436,6 +446,8 @@ class RepositoryType(Enum): CAT_CODEMETA_DESCRIPTION = "description" CAT_CODEMETA_DEVELOPMENTSTATUS = "developmentStatus" CAT_CODEMETA_DOWNLOADURL = "downloadUrl" +CAT_CODEMETA_FUNDER = "funder" +CAT_CODEMETA_FUNDING = "funding" CAT_CODEMETA_ISSUETRACKER = "issueTracker" CAT_CODEMETA_IDENTIFIER = "identifier" CAT_CODEMETA_KEYWORDS = "keywords"