Settings

Settings

class oagdedupe.settings.Settings(_env_file: Optional[Union[str, PathLike, List[Union[str, PathLike]], Tuple[Union[str, PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, PathLike]] = None, *, attributes: list = ['name', 'addr'], name: str = 'default', folder: Path = PosixPath('.dedupe'), model: SettingsModel = SettingsModel(dedupe=True, n=5000, k=3, max_compare=1000000, n_covered=500000, cpus=1, path_model=PosixPath('.dedupe/model')), db: Optional[SettingsDB] = SettingsDB(path_database='postgresql+psycopg2://username:password@0.0.0.0:8000/db', db_schema='dedupe'), label_studio: SettingsLabelStudio = SettingsLabelStudio(host='http://0.0.0.0', port=8090, api_key='please provide an api key', description='entity resolution'), fast_api: SettingsService = SettingsService(host='http://0.0.0.0', port=8090))[source]

project settings

class Config[source]
env_file = '.env'
env_nested_delimiter = '__'
env_prefix = 'oagdedupe_'
_abc_impl = <_abc_data object>
attributes: list

name of the project, a unique identifier

property compare_cols: List[str]

gets comparison columns with “_l” and “_r” suffices

Return type

List[str]

Examples

>>> self.settings.attributes = ["name", "address"]
>>> compare_cols()
[
    "name_l", "address_l", "name_r", "address_r",
    "_index_l", "_index_r"
]
db: Optional[SettingsDB]

label studio settings

fast_api: SettingsService
folder: Path

model settings

label_studio: SettingsLabelStudio

fast api settings

model: SettingsModel

other project settings

name: str

path to folder to store the config file

class oagdedupe.settings.SettingsDB(*, path_database: str = 'postgresql+psycopg2://username:password@0.0.0.0:8000/db', db_schema: str = 'dedupe')[source]

Other project settings

_abc_impl = <_abc_data object>
property db
db_schema: str
path_database: str

database schema

class oagdedupe.settings.SettingsLabelStudio(*, host: str = 'http://0.0.0.0', port: int = 8090, api_key: str = 'please provide an api key', description: str = 'entity resolution')[source]
_abc_impl = <_abc_data object>
api_key: str

project description

description: str
class oagdedupe.settings.SettingsModel(*, dedupe: bool = True, n: int = 5000, k: int = 3, max_compare: int = 1000000, n_covered: int = 500000, cpus: int = 1, path_model: Path = PosixPath('.dedupe/model'))[source]

dedupe vs record-linkage

_abc_impl = <_abc_data object>
cpus: int

path to model

dedupe: bool

block learner sample size (per learning loop)

k: int

maximum number of comparisons;

max_compare: int

maximum number of comparisons

n: int

maximum length of blocking scheme conjunctions

n_covered: int

number of cpus to use

path_model: Path
class oagdedupe.settings.SettingsService(*, host: str = 'http://0.0.0.0', port: int = 8090)[source]

settings for a service

_abc_impl = <_abc_data object>
host: str
port: int
property url: str