Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions .github/dependabot.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,43 @@
version: 2

multi-ecosystem-groups:
build:
schedule:
interval: "monthly"

updates:
- package-ecosystem: "gradle"
directory: "./" # Hack to allow multiple definitions of Gradle
patterns:
- "gradle-wrapper"
- "com.bakdata.release"
- "com.bakdata.sonar"
- "com.bakdata.jib"
- "com.bakdata.avro"
- "com.bakdata.mockito"
# freefair plugins follow Gradle's versioning
- "io.freefair*"
cooldown:
default-days: 7
exclude:
- "com.bakdata*"
multi-ecosystem-group: "build"
- package-ecosystem: "gradle"
directory: "/"
schedule:
interval: "monthly"
cooldown:
default-days: 7
exclude:
- "com.bakdata*"
ignore: # Should match the patterns that are selected for "build" multi-ecosystem Gradle updates
- dependency-name: "gradle-wrapper"
- dependency-name: "com.bakdata.release"
- dependency-name: "com.bakdata.sonar"
- dependency-name: "com.bakdata.jib"
- dependency-name: "com.bakdata.avro"
- dependency-name: "com.bakdata.mockito"
- dependency-name: "io.freefair*"
groups:
log-dependencies:
patterns:
Expand All @@ -26,10 +60,25 @@ updates:
patterns:
- "io.freefair*"

- package-ecosystem: "github-actions"
directory: "./" # Hack to allow multiple definitions of GitHub Actions
patterns:
- "bakdata/ci-templates*"
cooldown:
default-days: 7
exclude:
- "bakdata/ci-templates*"
multi-ecosystem-group: "build"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
cooldown:
default-days: 7
exclude:
- "bakdata/ci-templates*"
ignore: # Should match the patterns that are selected for "build" multi-ecosystem GitHub Actions updates
- dependency-name: "bakdata/ci-templates*"
groups:
ci-templates:
patterns:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-and-publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
branches:
- master
pull_request:
merge_group:

jobs:
build-and-publish:
Expand Down
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[![Build Status](https://dev.azure.com/bakdata/public/_apis/build/status/bakdata.dedupe?branchName=master)](https://dev.azure.com/bakdata/public/_build/latest?definitionId=4&branchName=master)
[![Build and Publish](https://github.com/bakdata/dedupe/actions/workflows/build-and-publish.yaml/badge.svg)](https://github.com/bakdata/dedupe/actions/workflows/build-and-publish.yaml)
[![Sonarcloud status](https://sonarcloud.io/api/project_badges/measure?project=com.bakdata.dedupe%3Adedupe&metric=alert_status)](https://sonarcloud.io/dashboard?id=com.bakdata.dedupe%3Adedupe)
[![Code coverage](https://sonarcloud.io/api/project_badges/measure?project=com.bakdata.dedupe%3Adedupe&metric=coverage)](https://sonarcloud.io/dashboard?id=com.bakdata.dedupe%3Adedupe)
[![Maven](https://img.shields.io/maven-central/v/com.bakdata.dedupe/core.svg)](https://search.maven.org/search?q=g:com.bakdata.dedupe&core=gav)
Expand Down Expand Up @@ -56,8 +56,8 @@ OnlineDeduplication<Person> deduplication =
// apply it to a list of customers
List<Person> customers = ...;
for(Person customer: customers) {
final Person fusedPerson = deduplication.deduplicate(customer);
// store fused person
final Person fusedPerson = this.deduplication.deduplicate(this.customer);
// store fused person
}
```

Expand Down Expand Up @@ -89,7 +89,7 @@ OnlineCandidateSelection<Person> candidateSelection = OnlineSortedNeighborhoodMe
person -> CompositeValue.of(person.getBirthDate(), normalizeName(person.getLastName()))))
.build();

private static String normalizeName(String value) {
private static String normalizeName(final String value) {
// split umlauts into canonicals
return java.text.Normalizer.normalize(value.toLowerCase(), java.text.Normalizer.Form.NFD)
// remove everything in braces
Expand All @@ -113,10 +113,10 @@ Classifier<Person> personClassifier = RuleBasedClassifier.<Person>builder()
.negativeRule("Different social security number", inequality().of(Person::getSSN))
.positiveRule("Default", CommonSimilarityMeasures.<Person>weightedAverage()
.add(10, Person::getSSN, equality())
.add(2, Person::getFirstName, max(levenshtein().cutoff(.5f), jaroWinkler()))
.add(2, Person::getFirstName, max(levenshtein().cutoff(0.5f), jaroWinkler()))
.add(2, Person::getLastName, max(equality().of(beiderMorse()), jaroWinkler()))
.build()
.scaleWithThreshold(.9f))
.scaleWithThreshold(0.9f))
.build();
```

Expand Down Expand Up @@ -168,7 +168,7 @@ ConflictResolution<Person, Person> personMerge = ConflictResolutions.merge(Perso
Fusion<Person> personFusion = ConflictResolutionFusion.<Person>builder()
.sourceExtractor(Person::getSource)
.lastModifiedExtractor(Person::getLastModified)
.rootResolution(personMerge)
.rootResolution(this.personMerge)
.build();
```

Expand Down
Loading