Reference

arakawa_points

Point

Class that represents the Arakawa points (i.e. center point, face, edge, etc).

Source code in xnemogcm/arakawa_points.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class Point:
    """
    Class that represents the Arakawa points (i.e. center point, face, edge, etc).
    """

    def __init__(self, point_type):
        """
        point_type : 'T', 'U', 'V', 'F', 'W', 'UW', 'VW', 'FW'
        """
        if point_type not in ALL_POINTS:
            raise (
                ValueError(
                    f"*point_type* must be in ['T', 'U', 'V', 'F', 'W', 'UW', 'VW', 'FW'] \n   We got point_type={point_type}"
                )
            )
        self.point_type = point_type
        self.get_x()
        self.get_y()
        self.get_z()

    def get_x(self):
        if self.point_type in ["T", "V", "W", "VW"]:
            # Center of the cell
            self.x = "x_c"
        elif self.point_type in ["U", "F", "UW", "FW"]:
            # Face of the cell
            self.x = "x_f"

    def get_y(self):
        if self.point_type in ["T", "U", "W", "UW"]:
            # Center of the cell
            self.y = "y_c"
        elif self.point_type in ["V", "F", "VW", "FW"]:
            self.y = "y_f"

    def get_z(self):
        if self.point_type in ["T", "U", "V", "F"]:
            self.z = "z_c"
        elif self.point_type in ["W", "UW", "VW", "FW"]:
            self.z = "z_f"

__init__(point_type)

point_type : 'T', 'U', 'V', 'F', 'W', 'UW', 'VW', 'FW'

Source code in xnemogcm/arakawa_points.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def __init__(self, point_type):
    """
    point_type : 'T', 'U', 'V', 'F', 'W', 'UW', 'VW', 'FW'
    """
    if point_type not in ALL_POINTS:
        raise (
            ValueError(
                f"*point_type* must be in ['T', 'U', 'V', 'F', 'W', 'UW', 'VW', 'FW'] \n   We got point_type={point_type}"
            )
        )
    self.point_type = point_type
    self.get_x()
    self.get_y()
    self.get_z()

domcfg

domcfg_preprocess(ds)

Preprocess domcfg / meshmask files when needed to be recombined (= 1 file per processor)

Source code in xnemogcm/domcfg.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def domcfg_preprocess(ds):
    """
    Preprocess domcfg / meshmask files when needed to be recombined (= 1 file per processor)
    """
    # nemo 3.6
    if "z" in ds or "z" in ds.dims or "z" in ds.coords:
        ds = ds.swap_dims({"z": "nav_lev"})
    if "DOMAIN_position_first" in ds.attrs.keys():
        (x0, y0) = ds.attrs["DOMAIN_position_first"]
        ds = ds.assign_coords({"x": ds.x + x0 - 1, "y": ds.y + y0 - 1})
    else:
        # This means that we are not merging multiple outputs from processors but e.g. a domain_cfg and a mesh_mask
        ds.coords["x"] = ds.x
        ds.coords["y"] = ds.y
    # We need to add "nav_lev" in the coordinates if not present
    if ("nav_lev" not in ds.coords) and ("nav_lev" in ds):
        ds.coords["nav_lev"] = ds["nav_lev"]
    return ds

open_domain_cfg(datadir=None, files=None, add_coordinates=True)

Return a dataset containing all dataarrays of the domain_cfg*.nc / mesh_mask files.

For that, open and merge all the datasets. The dataset is compatible with xgcm, the corresponding grid can be create through: xgcm.Grid(domcfg)

Parameters:
  • datadir (string or Path or None, default: None ) –

    The directory containing the 'domain_cfg' or 'mesh_mask' files

  • files (list or iterator or None, default: None ) –

    list of the file names that correspond to the domain_cfg and/or mesh_mask files, e.g. 'files=Path('path/to/data').glob('my_domcfg.nc') if your domain_cfg files are called 'something_my_domcfg_00.nc' and 'something_my_domcfg_01.nc'

  • add_coordinates (bool, default: True ) –

    Whether to add the 'glamt', 'gphit', etc as coordinates of the dataset

Returns:
  • domcfg( Dataset ) –

    The domain configuration dataset, can be read by xgcm.

Source code in xnemogcm/domcfg.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def open_domain_cfg(datadir=None, files=None, add_coordinates=True):
    """
    Return a dataset containing all dataarrays of the domain_cfg*.nc / mesh_mask files.

    For that, open and merge all the datasets.
    The dataset is compatible with xgcm, the corresponding grid
    can be create through: xgcm.Grid(domcfg)

    Parameters
    ----------
    datadir : string or pathlib.Path or None
        The directory containing the 'domain_cfg' or 'mesh_mask' files
    files : list or iterator or None
        list of the file names that correspond to the domain_cfg and/or mesh_mask files,
        e.g. 'files=Path('path/to/data').glob('*my_domcfg*.nc') if your domain_cfg files are called
        'something_my_domcfg_00.nc' and 'something_my_domcfg_01.nc'
    add_coordinates : bool
        Whether to add the 'glamt', 'gphit', etc as coordinates of the dataset

    Returns
    -------
    domcfg : xarray.Dataset
        The domain configuration dataset, can be read by xgcm.
    """
    # TODO see dask arrays (chunk argument in xr.open_dataset)
    files = _dir_or_files_to_files(
        datadir, files, patterns=["*domain_cfg*.nc", "*mesh_mask*.nc"]
    )
    #
    if not files:
        raise FileNotFoundError("No 'domain_cfg' or 'mesh_mask' files are provided")
    #
    domcfg = open_file_multi(files=files)
    #
    # This part is used to put the vars on the right point of the grid (e.g. T, U, V points)
    domcfg_points = get_domcfg_points()
    # Replacing the name of the coordinates
    for i in domcfg.keys():
        if i not in domcfg_points.keys():
            continue
        if domcfg_points[i] is not None:
            point = akp.Point(domcfg_points[i])
            if "x" in domcfg[i].coords:
                domcfg[i] = domcfg[i].rename({"x": point.x})
            if "y" in domcfg[i].coords:
                domcfg[i] = domcfg[i].rename({"y": point.y})
            if "nav_lev" in domcfg[i].coords:
                domcfg[i] = domcfg[i].rename({"nav_lev": point.z})
    #
    domcfg["x_f"] = domcfg["x_c"] + 0.5
    domcfg["y_f"] = domcfg["y_c"] + 0.5
    domcfg = domcfg.assign_coords(z_c=np.arange(len(domcfg["z_c"])))
    domcfg["z_f"] = domcfg["z_c"] - 0.5
    #
    domcfg.coords["x_c"] = (
        [
            "x_c",
        ],
        domcfg.coords["x_c"].data,
        {"axis": "X"},
    )  # center point
    domcfg.coords["x_f"] = (
        [
            "x_f",
        ],
        domcfg.coords["x_f"].data,
        {"axis": "X", "c_grid_axis_shift": 0.5},
    )  # right  point
    domcfg.coords["y_c"] = (
        [
            "y_c",
        ],
        domcfg.coords["y_c"].data,
        {"axis": "Y"},
    )  # center point
    domcfg.coords["y_f"] = (
        [
            "y_f",
        ],
        domcfg.coords["y_f"].data,
        {"axis": "Y", "c_grid_axis_shift": 0.5},
    )  # right  point
    #
    domcfg.coords["z_c"] = (
        [
            "z_c",
        ],
        domcfg.coords["z_c"].data,
        {"axis": "Z"},
    )  # center point
    domcfg.coords["z_f"] = (
        [
            "z_f",
        ],
        domcfg.coords["z_f"].data,
        {"axis": "Z", "c_grid_axis_shift": -0.5},
    )  # left   point
    # Cleaning unused coordinates
    coordinates = [
        key for key in domcfg.coords.keys()
    ]  # all coordinates, including unused ones
    for var in domcfg.data_vars:
        for coord in domcfg[var].dims:
            if coord in coordinates:
                coordinates.pop(coordinates.index(coord))
    # coordinates now contains unused coordinates
    for coord in coordinates:
        domcfg = domcfg.drop_dims(coord, errors="ignore").drop_vars(
            coord, errors="ignore"
        )
    # adding variables as coordinates
    if add_coordinates:
        domcfg = _add_coordinates(domcfg)
    # Remove nav_lon and nav_lat
    domcfg = domcfg.drop_vars(["nav_lon", "nav_lat"], errors="ignore")
    # Add cf
    domcfg = _add_cf(domcfg)
    return domcfg

open_file_multi(files)

Open and merge netcdf file created on each processor by NEMO (e.g. domain_cfg of mesh_mask). If only one file is present, open and return it without any process.

2 methods are accepted: 1) give a directory pathdir and a file prefix (e.g. 'domain_cfg') file_prefix, 2) give a list of file names files.

Source code in xnemogcm/domcfg.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def open_file_multi(files):
    """
    Open and merge netcdf file created on each processor by NEMO (e.g. domain_cfg of mesh_mask).
    If only one file is present, open and return it without any process.

    2 methods are accepted: 1) give a directory *pathdir* and a file prefix (e.g. 'domain_cfg')
    *file_prefix*, 2) give a list of file names *files*.
    """
    ds = xr.open_mfdataset(
        files,
        preprocess=domcfg_preprocess,
        combine_attrs="drop_conflicts",
        data_vars="minimal",
        drop_variables=["x", "y"],
    )
    # data_vars='minimal' necessary to not add x and y dimensions into dimensionless variables
    # see https://github.com/pydata/xarray/issues/2064
    # drop_variables necessary for nemo 4.2, because x and y are both dimensions and variables

    for i in ["time_counter", "t"]:
        if i in ds.dims:
            ds = ds.squeeze(i)
    for i in [
        "DOMAIN_position_first",
        "DOMAIN_position_last",
        "DOMAIN_number",
        "DOMAIN_number_total",
        "DOMAIN_size_local",
    ]:
        ds.attrs.pop(i, None)

    return ds

merge

open_nemo_and_domain_cfg(nemo_files=None, domcfg_files=None, nemo_kwargs=None, domcfg_kwargs=None, linear_free_surface=False)

Open nemo_ds and domcfg with open_nemo and open_domain_cfg and merge them with _merge_nemo_and_domain_cfg.

See the respective functions docstrings for more details.

2 methods are available for nemo files and domain_cfg/mesh_mask files: 1) provide a list of the files you want to open, 2) provide the path of the directories containing the files and xnemogcm will try to open as much files as it can.

Arguments

nemo_files : Optional, list / generator or string / Path 1) list / generator containing the nemo output files, or 2) string / Path of the directory containing the nemo output files. Will open all files containing "grid_X" in their name, "X" being "T", "U", "V", "W", "F", etc domcfg_files : Optional, list / generator or string / Path 1) list / generator containing the domain_cfg / mesh_mask files, or 2) string / Path of the directory containing the domain_cfg / mesh_mask output files. Will open all files containing "domain_cfg" or "mesh_mask" in their name. nemo_kwargs : dict dict containing the parameters of the xnemogcm.open_nemo function Can contain the files and/or datadir arguments of the open_nemo function e.g. {'chunks':{'time_counter':10}} domcfg_kwargs : dict dict containing the parameters of the xnemogcm.open_domain_cfg function Can contain the files and/or datadir arguments of the open_domain_cfg function linear_free_surface : bool True if linear free surface is used. Used by xnemogcm._merge_nemo_and_domain_cfg function

Source code in xnemogcm/merge.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def open_nemo_and_domain_cfg(
    nemo_files=None,
    domcfg_files=None,
    nemo_kwargs=None,
    domcfg_kwargs=None,
    linear_free_surface=False,
):
    """
    Open nemo_ds and domcfg with open_nemo and open_domain_cfg and merge them with _merge_nemo_and_domain_cfg.

    See the respective functions docstrings for more details.

    2 methods are available for nemo files and domain_cfg/mesh_mask files: 1) provide a list of the files
    you want to open, 2) provide the path of the directories containing the files and xnemogcm will try
    to open as much files as it can.

    Arguments
    ---------
    nemo_files : Optional, list / generator or string / Path
        1) list / generator containing the nemo output files, or
        2) string / Path of the directory containing the nemo output files.
           Will open all files containing "grid_X" in their name, "X" being "T", "U", "V", "W", "F", etc
    domcfg_files : Optional, list / generator or string / Path
        1) list / generator containing the domain_cfg / mesh_mask files, or
        2) string / Path of the directory containing the domain_cfg / mesh_mask output files.
           Will open all files containing "domain_cfg" or "mesh_mask" in their name.
    nemo_kwargs : dict
        dict containing the parameters of the xnemogcm.open_nemo function
        Can contain the files and/or datadir arguments of the open_nemo function
        e.g. {'chunks':{'time_counter':10}}
    domcfg_kwargs : dict
        dict containing the parameters of the xnemogcm.open_domain_cfg function
        Can contain the files and/or datadir arguments of the open_domain_cfg function
    linear_free_surface : bool
        True if linear free surface is used. Used by xnemogcm._merge_nemo_and_domain_cfg function
    """
    # Necessary to avoid mutable default arguments
    # e.g. https://nikos7am.com/posts/mutable-default-arguments/
    if nemo_kwargs is None:
        nemo_kwargs = {}
    if domcfg_kwargs is None:
        domcfg_kwargs = {}

    if isinstance(domcfg_files, (list, types.GeneratorType, map)):
        domcfg_kwargs["files"] = domcfg_files
    elif isinstance(domcfg_files, (str, Path)):
        domcfg_kwargs["datadir"] = domcfg_files

    if isinstance(nemo_files, (list, types.GeneratorType, map)):
        nemo_kwargs["files"] = nemo_files
    elif isinstance(nemo_files, (str, Path)):
        nemo_kwargs["datadir"] = nemo_files

    domcfg = open_domain_cfg(**domcfg_kwargs)
    nemo_kwargs["domcfg"] = domcfg
    nemo_ds = open_nemo(**nemo_kwargs)
    return _merge_nemo_and_domain_cfg(nemo_ds, domcfg, linear_free_surface)

metrics

compute_missing_metrics(ds, all_scale_factors=all_scale_factors, time_varying=True)

Add all possible scale factors to the dataset.

For the moment, e3t (or e3t_0) at least needs to be present in the dataset for the time_varying=True (time_varying=False) case. If e3t_0 is not found (e.g. for nemo 3.6), it will raise a warning and use e3t_1d (this will lead to wrong results if terrain-following coordinates are used).

May have some boundary issues, and only non-periodic boundaries are implemented.

Will add the metrics to the given dataset. To avoid this, use a ds.copy()

Parameters:
  • ds (Dataset) –

    dataset containing the scale factors. Must be xgcm compatible (e.g. opened with xnemogcm)

  • all_scale_factors (list, default: all_scale_factors ) –

    list of the scale factors to compute (nothing is done for the scale factors already present in ds) Must be a sublist of: ['e3t', 'e3u', 'e3v', 'e3f', 'e3w', 'e3uw', 'e3vw', 'e3fw']

  • time_varying (bool, default: True ) –

    Whether to use the time varying scale factors (True) of the constant ones (False, 'e3x_0')

Returns:
  • the new dataset with the scale factors added
Source code in xnemogcm/metrics.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def compute_missing_metrics(ds, all_scale_factors=all_scale_factors, time_varying=True):
    """
    Add all possible scale factors to the dataset.

    For the moment, e3t (or e3t_0) at least needs to be present in the dataset
    for the time_varying=True (time_varying=False) case.
    If e3t_0 is not found (e.g. for nemo 3.6), it will raise a warning and use e3t_1d
    (this will lead to wrong results if terrain-following coordinates are used).

    May have some boundary issues, and only non-periodic boundaries are implemented.

    Will add the metrics to the given dataset. To avoid this, use a ds.copy()

    Parameters
    ----------
    ds : xarray.Dataset
        dataset containing the scale factors. Must be xgcm compatible (e.g. opened with xnemogcm)
    all_scale_factors : list
        list of the scale factors to compute (nothing is done for the scale factors
        already present in *ds*)
        Must be a sublist of: ['e3t', 'e3u', 'e3v', 'e3f', 'e3w', 'e3uw', 'e3vw', 'e3fw']
    time_varying : bool
        Whether to use the time varying scale factors (True) of the constant ones (False, 'e3x_0')

    Returns
    -------
    the new dataset with the scale factors added
    """
    try:
        import xgcm
    except ModuleNotFoundError:
        raise ModuleNotFoundError(
            "xgcm is not installed, you need xgcm for this function"
        )
    from warnings import warn

    warn(
        "This function is in pre-phase. Do not expect a high precision, but a good estimate. Some boundary issues may arise."
    )

    grid = xgcm.Grid(ds, periodic=False)

    if time_varying:
        e3t = "e3t"
    else:
        e3t = "e3t_0"
    if e3t not in ds:
        if "e3t_1d" not in ds:
            raise (
                ValueError(
                    f"None of {e3t} or e3t_1d are found in the dataset, but it is mandatory to have at least one of them."
                )
            )
        warn(
            f"{e3t} scale factor not found in the dataset, we will use e3t_1d. This will lead to errors if you use terrain-following coordinates."
        )
        ds[e3t] = ds["e3t_1d"].broadcast_like(ds["x_c"]).broadcast_like(ds["y_c"])

    if not time_varying:
        all_scale_factors = [i + "_0" for i in all_scale_factors]

    for i in all_scale_factors:
        if i not in ds.variables:
            if time_varying:
                vertex = dep_graph[i]
            else:
                vertex = dep_graph[i[:-2]]
            for e3 in vertex.keys():
                if time_varying:
                    e3_nme = e3
                else:
                    e3_nme = e3 + "_0"
                if e3_nme in ds.variables:
                    # we stop at the first one matching
                    ds[i] = grid.interp(ds[e3_nme], vertex[e3], boundary="extend")
    return ds

get_metrics(ds)

Return a dict with the available metrics, to be used with xgcm.Grid

Parameters:
  • ds (Dataset) –

    domain_cfg or DataSet returned by xnemogcm._merge_nemo_and_domain_cfg or Dataset returned by xnemogcm.open_nemo_and_domain_cfg Should contain the outputted metrics, in a standard format 'e3x' with x an arakawa point in lower case

Returns:
  • metrics( dict ) –

    dict understood by xgcm.Grid, metrics argument

Source code in xnemogcm/metrics.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def get_metrics(ds):
    """
    Return a dict with the available metrics, to be used with xgcm.Grid

    Parameters
    ----------
    ds : xarray.Dataset
        domain_cfg
        or DataSet returned by xnemogcm._merge_nemo_and_domain_cfg
        or Dataset returned by xnemogcm.open_nemo_and_domain_cfg
        Should contain the outputted metrics, in a standard format 'e3x'
        with x an arakawa point in lower case

    Returns
    -------
    metrics : dict
        dict understood by xgcm.Grid, metrics argument
    """
    metrics = {
        ("X",): ["e1t", "e1u", "e1v", "e1f"],  # X distances
        ("Y",): ["e2t", "e2u", "e2v", "e2f"],  # Y distances
        ("Z",): [
            "e3t",
            "e3u",
            "e3v",
            "e3f",
            "e3w",
            "e3uw",
            "e3vw",
            "e3fw",
        ],  # Z distances
    }
    metrics_output = {}
    for point in metrics.keys():
        m = []
        for e in metrics[point]:
            if e in ds.variables:
                m.append(e)
        metrics_output[point] = m
    return metrics_output

namelist

open_namelist(datadir=None, files=None, ref=True, cfg=True)

Open the namelist and store it into a xarray.Dataset

Source code in xnemogcm/namelist.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def open_namelist(datadir=None, files=None, ref=True, cfg=True):
    """
    Open the namelist and store it into a xarray.Dataset
    """
    import f90nml

    files = _dir_or_files_to_files(datadir, files, patterns=["namelist*"])

    if len(files) > 2:
        raise ValueError(
            f"Too many files given for the namelists, please check. Got {files}"
        )

    ds = xr.Dataset()

    for load, name in [[ref, "ref"], [cfg, "cfg"]]:
        if load:
            try:
                namelist = f90nml.read(*[i for i in files if name in str(i)])
                for nam_key in namelist.keys():
                    for i in namelist[nam_key]:
                        ds[i] = namelist[nam_key][i]
                        ds[i].attrs["namelist"] = nam_key
            except (FileNotFoundError, TypeError):
                _warn_namelist_not_found(name)

    return ds

nemo

nemo_preprocess(ds, domcfg, point_type=None)

Preprocess function for the nemo files.

This function renames the time dimension 'time_counter' into 't', 'time_counter_bounds' into 't_bounds'. It removes the old 'nav_lat' and 'nav_lon' variables and sets the 'x', 'y', and 'z' dimensions into the correct dimension, depending on the grid point (e.g. ['x_c', 'y_c', 'z_c'] for T point).

Parameters:
  • ds (Dataset) –

    a dataset containing raw NEMO output data (e.g. opened from a netcdf file as 'BASIN_grid_T.nc' or opened from any other backend, zarr, etc), with the old names for the variables and dimensions (e.g. 'time_counter')

  • domcfg (Dataset) –

    a dataset containing the domcfg data

  • point_type

    The point type. If None, will be inferred from either filename or attribute

Returns:
  • xarray.Dataset containing the new dimension names, the correct grid point and attributes.
Source code in xnemogcm/nemo.py
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def nemo_preprocess(ds, domcfg, point_type=None):
    """
    Preprocess function for the nemo files.

    This function renames the time dimension 'time_counter' into 't', 'time_counter_bounds' into 't_bounds'.
    It removes the old 'nav_lat' and 'nav_lon' variables and sets the 'x', 'y', and 'z' dimensions
    into the correct dimension, depending on the grid point (e.g. ['x_c', 'y_c', 'z_c'] for T point).

    Parameters
    ----------
    ds : xarray.Dataset
        a dataset containing raw NEMO output data (e.g. opened from a netcdf file as 'BASIN_grid_T.nc'
        or opened from any other backend, zarr, etc),
        with the old names for the variables and dimensions (e.g. 'time_counter')
    domcfg : xarray.Dataset
        a dataset containing the domcfg data
    point_type: None or str in ['T', 'U', 'V', 'W', 'UW', 'VW', 'FW']
        The point type. If None, will be inferred from either filename or attribute

    Returns
    -------
    xarray.Dataset containing the new dimension names, the correct grid point and attributes.
    """
    to_rename = {}
    if point_type is None:
        point_type = _get_point_type(
            filename=ds.encoding.get("source", ""),
            description=ds.attrs.get("description", ""),
        )

    point = akp.Point(point_type)

    # the depth variable name can be either deptht, depthu, etc
    # or grid_T_3D_inner, etc
    all_z_nme = [i for i in ds.dims if _is_depth_dim(i, ds[i].attrs)]
    if len(all_z_nme) >= 1:
        z_nme = all_z_nme[0]
        ds = ds.swap_dims({i: "depth_tmp_xnemogcm" for i in all_z_nme}).swap_dims(
            {"depth_tmp_xnemogcm": z_nme}
        )
    else:
        # This means that there is no depth dependence of the data (surface data)
        z_nme = None

    # get the name of the dimension along i e.g. x, x_grid_U, x_grid_U_inner etc
    x_nme = [i for i in ds.dims if "x_grid" in i or i == "x"]
    # get the name of the dimension along j e.g. y, y_grid_U, y_grid_U_inner etc
    y_nme = [i for i in ds.dims if "y_grid" in i or i == "y"]

    for x in x_nme:
        to_rename.update({x: point.x})

    for y in y_nme:
        to_rename.update({y: point.y})

    points = [point.x, point.y]
    if z_nme:
        to_rename.update({z_nme: point.z})
        points += [point.z]

    ds = ds.drop_vars(
        ["nav_lat", "nav_lon"],
        errors="ignore",
    )
    # rename time and space
    # get time_counter bounds
    time_b = ds["time_counter"].attrs.get("bounds")
    if time_b and time_b in ds:
        to_rename.update({"time_counter": "t", time_b: "t_bounds"})
    else:
        to_rename.update({"time_counter": "t"})
        if time_b not in ds:
            ds["time_counter"].attrs.pop("bounds")
            time_b = None
    ds = ds.rename(to_rename)
    if time_b and "t_bounds" in ds:
        ds["t"].attrs["bounds"] = "t_bounds"
    # setting z_c/z_f/x_c/etc to be the same as in domcfg
    ds = ds.assign_coords({i: domcfg[i] for i in points})
    # Assign the proper coordinates
    # 1st case: horizontal
    if z_nme:
        p = set(points[:2])
    else:
        p = set(points)
    coords = [i for i in domcfg.coords if set(domcfg.coords[i].dims) == p]
    ds = ds.assign_coords({i: domcfg[i] for i in coords})
    # 2nd case vertical
    if z_nme:
        p = set(points)
        coords = [i for i in domcfg.coords if set(domcfg.coords[i].dims) == p]
        ds = ds.assign_coords({i: domcfg[i] for i in coords})
    return ds

open_nemo(domcfg, datadir=None, files=None, chunks=None, parallel=False, **kwargs_open)

Open nemo dataset, and rename the coordinates to be conform to xgcm.Grid

The filenames must finish with 'grid_X.nc', with X in ['T', 'U', 'V', 'W', 'UW', 'VW', 'FW'] OR the global attribute 'description' of each individual file must be 'ocean X grid variables' with X in ['T', 'U', ...]

Parameters:
  • datadir (string or Path, default: None ) –

    The directory containing the nemo files

  • domcfg (Dataset) –

    the domcfg dataset, e.g. opened with xnemogcm.open_domain_cfg

  • files (list, default: None ) –

    List of the files to open

  • chunks (dict, default: None ) –

    The chunks to use when opening the files, e.g. chunks={'time_counter':10} /! chunks need to be provided with the old names of dimensions i.e. 'time_counter', 'x', etc For more complex chunking, you may want to open without any chunks and set them up afterward.

  • kwargs_open (any other argument given to the xarray.open_dataset function, default: {} ) –
Returns:
  • nemo_ds( Dataset ) –

    Dataset containing all outputted variables, set on the proper grid points (center, face, etc).

Source code in xnemogcm/nemo.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def open_nemo(
    domcfg, datadir=None, files=None, chunks=None, parallel=False, **kwargs_open
):
    """
    Open nemo dataset, and rename the coordinates to be conform to xgcm.Grid

    The filenames must finish with 'grid_X.nc', with X in
    ['T', 'U', 'V', 'W', 'UW', 'VW', 'FW']
    *OR*
    the global attribute 'description' of each individual file must
    be 'ocean X grid variables' with X in ['T', 'U', ...]

    Parameters
    ----------
    datadir : string or pathlib.Path
        The directory containing the nemo files
    domcfg : xarray.Dataset
        the domcfg dataset, e.g. opened with xnemogcm.open_domain_cfg
    files : list, optional
        List of the files to open
    chunks : dict
        The chunks to use when opening the files,
        e.g. chunks={'time_counter':10}
        /! chunks need to be provided with the old names of dimensions
        i.e. 'time_counter', 'x', etc
        For more complex chunking, you may want to open without any chunks and set them up afterward.
    kwargs_open : any other argument given to the xarray.open_dataset function

    Returns
    -------
    nemo_ds : xarray.Dataset
        Dataset containing all outputted variables, set on the proper
        grid points (center, face, etc).
    """
    files = _dir_or_files_to_files(datadir, files, patterns=["*grid_*.nc"])
    if not files:
        raise FileNotFoundError("No output files are provided")
    #
    if parallel:
        from dask import delayed

        open_dataset = delayed(xr.open_dataset)
        get_point_type = delayed(_get_point_type)
    else:
        open_dataset = xr.open_dataset
        get_point_type = _get_point_type
    datasets = [
        open_dataset(
            f,
            chunks=chunks or {},
            **kwargs_open,
        )
        for f in files
    ]
    positions = [
        (
            ds,
            get_point_type(
                filename=str(f), description=ds.attrs.get("description", "")
            ),
        )
        for ds, f in zip(datasets, files)
    ]

    # Follow xarray's handling of open_mfdatasets
    try:
        out = process_nemo(positions=positions, domcfg=domcfg, parallel=parallel)
    except ValueError:
        for ds in datasets:
            ds.close()
        raise
    return out

process_nemo(positions, domcfg, parallel=False)

Process datasets from NEMO outputs and set coordinates and attributes.

Parameters:
  • positions (list of tuples) –

    [(ds1, 'X'), (ds2, 'Y'), (ds3, 'Z'), etc] Here 'X', 'Y', 'Z' must me the proper positions e.g. in ['T', 'U', 'V', 'W', 'UW', 'VW', 'FW'] OR can be set to None. If None, then the corresponding dataset(s) must have the global attribute 'description' with value 'ocean X grid variables' with X in ['T', 'U', ...]

  • domcfg (Dataset) –

    the domcfg dataset

  • parallel (bool, default: False ) –

    whether to use dask.delayed to process tasks in parallel

Returns:
  • nemo_ds( Dataset ) –

    Dataset containing all outputted variables, set on the proper grid points (center, face, etc).

Source code in xnemogcm/nemo.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def process_nemo(positions, domcfg, parallel=False):
    """
    Process datasets from NEMO outputs and set coordinates and attributes.

    Parameters
    ----------
    positions : list of tuples
        [(ds1, 'X'), (ds2, 'Y'), (ds3, 'Z'), etc]
        Here 'X', 'Y', 'Z' must me the proper positions
        e.g. in ['T', 'U', 'V', 'W', 'UW', 'VW', 'FW']
        *OR*
        can be set to None. If None, then the corresponding dataset(s)
        must have the global attribute 'description' with value
        'ocean X grid variables' with X in ['T', 'U', ...]
    domcfg : xarray.Dataset
        the domcfg dataset
    parallel : bool, default False
        whether to use dask.delayed to process tasks in parallel

    Returns
    -------
    nemo_ds : xarray.Dataset
        Dataset containing all outputted variables, set on the proper
        grid points (center, face, etc).
    """
    if parallel:
        import dask

        # wrap preprocess with delayed
        preprocess = dask.delayed(nemo_preprocess)
    else:
        preprocess = nemo_preprocess
    """
    list_ds = []
    for X in positions.keys():
        for ds in positions[X]:
            list_ds.append((ds, X))
    """
    # Don't use kwargs inside preprocess, otherwise arguments are swapped, bug(?) in dask versions after 2024
    datasets = [
        preprocess(ds, domcfg, _check_position(ds, X, parallel))
        for (ds, X) in positions
    ]
    if parallel:
        # netcdf4 is not thread safe
        # https://github.com/pydata/xarray/issues/7079#issuecomment-1267477522
        with dask.config.set(scheduler="single-threaded"):
            (datasets,) = dask.compute(datasets)

    nemo_ds = xr.combine_by_coords(datasets, combine_attrs="drop_conflicts")
    # adding attributes
    nemo_ds.attrs["name"] = "NEMO dataset"
    nemo_ds.attrs["description"] = "Ocean grid variables, set on the proper positions"
    nemo_ds.attrs["title"] = "Ocean grid variables"
    return nemo_ds

tools

get_domcfg_points()

The points are hard coded at hand to be sure to not introduce errors from the reading of the names

Source code in xnemogcm/tools.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def get_domcfg_points():
    """The points are hard coded at hand to be sure to not introduce errors from the reading of the names"""
    domcfg_points = {
        "nav_lon": "T",
        "nav_lat": "T",
        "jpiglo": None,
        "jpjglo": None,
        "jpkglo": None,
        "jperio": None,
        "ln_zco": None,
        "ln_zps": None,
        "ln_sco": None,
        "ln_isfcav": None,
        "glamt": "T",
        "glamu": "U",
        "glamv": "V",
        "glamf": "F",
        "gphit": "T",
        "gphiu": "U",
        "gphiv": "V",
        "gphif": "F",
        "e1t": "T",
        "e1u": "U",
        "e1v": "V",
        "e1f": "F",
        "e2t": "T",
        "e2u": "U",
        "e2v": "V",
        "e2f": "F",
        "ff": "F",  # nemo 3.6
        "ff_f": "F",
        "ff_t": "T",
        "e3t_1d": "T",
        "e3w_1d": "W",
        "e3t_0": "T",
        "e3u_0": "U",
        "e3v_0": "V",
        "e3f_0": "F",
        "e3w_0": "W",
        "e3uw_0": "UW",
        "e3vw_0": "VW",
        "top_level": "T",
        "bottom_level": "T",
        "stiffness": "T",
        "gdept_0": "T",
        "gdepw_0": "W",
        "gdepu": "U",
        "gdepv": "V",
        "ht_0": "T",
        "hu_0": "U",
        "hv_0": "V",
        "tmask": "T",
        "umask": "U",
        "vmask": "V",
        "fmask": "F",
        "tmaskutil": "T",
        "umaskutil": "U",
        "vmaskutil": "V",
        "fmaskutil": "F",
        "gdept_1d": "T",
        "gdepw_1d": "W",
        "mbathy": "T",
        "misf": "T",
        "isfdraft": "T",
    }
    return domcfg_points