Source code for pmotools.pmo_builder.panel_information_to_pmo

#!/usr/bin/env python3
import copy
import json

import numpy as np
import pandas as pd
import warnings

from .json_convert_utils import remove_optional_null_values
from ..pmo_builder.json_convert_utils import check_additional_columns_exist


[docs]class PMOPanelBuilder:
    """
    Build PMO ``target_info`` and ``panel_info`` structures from a target table.

    Wraps a dataframe of one-row-per-target panel data and converts it into the
    nested dictionaries a PMO expects. Most users should call
    :func:`panel_info_table_to_pmo` instead of using this class directly.

    :param target_table: dataframe with one row per target
    :param panel_name: name assigned to the panel
    :param target_name_col: column holding the target names. Default: ``target_name``
    :param forward_primers_seq_col: column holding the forward primer sequence. Default: ``fwd_primer``
    :param reverse_primers_seq_col: column holding the reverse primer sequence. Default: ``rev_primer``
    :param reaction_name_col: optional column naming which reaction each target
        belongs to; if omitted, all targets go in a single reaction
    :param reaction_name_col_delimiter: delimiter splitting the reaction column
        into multiple reactions. Default: ``,``
    :param forward_primers_start_col: optional column with the 0-based forward primer start
    :param forward_primers_end_col: optional column with the 0-based forward primer end
    :param reverse_primers_start_col: optional column with the 0-based reverse primer start
    :param reverse_primers_end_col: optional column with the 0-based reverse primer end
    :param insert_start_col: optional column with the 0-based insert start
    :param insert_end_col: optional column with the 0-based insert end
    :param chrom_col: optional chromosome column; required if any location columns are set
    :param strand_col: optional strand column
    :param ref_seq_col: optional reference-sequence column for the insert
    :param gene_name_col: optional gene-name column
    :param target_attributes_col: optional column of target attribute classifications
    :param target_attributes_col_delimiter: delimiter splitting the attributes
        column into multiple attributes. Default: ``,``
    :param additional_target_info_cols: optional list of extra column names to
        copy verbatim into each target dict
    """

    def __init__(
        self,
        target_table: pd.DataFrame,
        panel_name: str,
        target_name_col: str = "target_name",
        forward_primers_seq_col: str = "fwd_primer",
        reverse_primers_seq_col: str = "rev_primer",
        reaction_name_col: str | None = None,
        reaction_name_col_delimiter: str = ",",
        forward_primers_start_col: int | None = None,
        forward_primers_end_col: int | None = None,
        reverse_primers_start_col: int | None = None,
        reverse_primers_end_col: int | None = None,
        insert_start_col: int | None = None,
        insert_end_col: int | None = None,
        chrom_col: str | None = None,
        strand_col: str | None = None,
        ref_seq_col: str | None = None,
        gene_name_col: str | None = None,
        target_attributes_col: str | None = None,
        target_attributes_col_delimiter: str = ",",
        additional_target_info_cols: list | None = None,
    ):
        self.target_table = target_table
        self.panel_name = panel_name
        self.target_name_col = target_name_col
        self.forward_primers_seq_col = forward_primers_seq_col
        self.reverse_primers_seq_col = reverse_primers_seq_col
        self.reaction_name_col = reaction_name_col
        self.reaction_name_col_delimiter = reaction_name_col_delimiter
        self.forward_primers_start_col = forward_primers_start_col
        self.forward_primers_end_col = forward_primers_end_col
        self.reverse_primers_start_col = reverse_primers_start_col
        self.reverse_primers_end_col = reverse_primers_end_col
        self.insert_start_col = insert_start_col
        self.insert_end_col = insert_end_col
        self.chrom_col = chrom_col
        self.strand_col = strand_col
        self.ref_seq_col = ref_seq_col
        self.gene_name_col = gene_name_col
        self.target_attributes_col = target_attributes_col
        self.target_attributes_col_delimiter = target_attributes_col_delimiter
        self.additional_target_info_cols = additional_target_info_cols

        self.location_info_cols = self.check_location_columns()

[docs]    def check_location_columns(self):
        """
        Validate the optional genomic-location column configuration.

        If any location column is set, enforces that ``chrom_col`` is present and
        that primer/insert start and end columns are supplied as pairs.

        :raises ValueError: if location columns are set inconsistently
        :return: the list of location columns if any were provided, otherwise None
        """
        location_cols = [
            self.forward_primers_start_col,
            self.forward_primers_end_col,
            self.reverse_primers_start_col,
            self.reverse_primers_end_col,
            self.insert_start_col,
            self.insert_end_col,
            self.chrom_col,
            self.strand_col,
            self.ref_seq_col,
        ]
        if any(location_cols):
            collect_warnings = []
            if not self.chrom_col:
                collect_warnings.append(
                    "If including location information (any of forward_primers_start_col, forward_primers_end_col, reverse_primers_start_col, reverse_primers_end_col, insert_start_col, insert_end_col) chrom_col must be set."
                )
            if (self.forward_primers_start_col is None) != (
                self.forward_primers_end_col is None
            ):
                collect_warnings.append(
                    "If one of forward_primers_start_col or forward_primers_end_col is set, then both must be."
                )
            if (self.reverse_primers_start_col is None) != (
                self.reverse_primers_end_col is None
            ):
                collect_warnings.append(
                    "If one of reverse_primers_start_col or reverse_primers_end_col is set, then both must be."
                )
            if (self.insert_start_col is None) != (self.insert_end_col is None):
                collect_warnings.append(
                    "If one of insert_start_col or insert_end_col is set, then both must be."
                )
            if collect_warnings:
                raise ValueError(
                    "Errors with location column configuration:\n- "
                    + "\n- ".join(collect_warnings)
                )
            return location_cols
        return None

[docs]    def check_target_names_are_unique(self):
        """
        Raise an exception if the target names are not unique

        :return: Nothing
        """
        duplications = self.target_table[
            self.target_table[self.target_name_col].duplicated(keep=False)
        ]
        if not duplications.empty:
            raise ValueError(
                f"The following target_ids are duplicated: {duplications[self.target_name_col].unique()}"
            )

[docs]    def check_unique_target_info(self, columns_to_check):
        """
        Raise an exception if the target info is not unique

        :param columns_to_check: the columns to check to ensure the target info is unique
        :return: Nothing
        """
        groups = (
            self.target_table.groupby(columns_to_check)[self.target_name_col]
            .apply(list)
            .reset_index(name=self.target_name_col)
        )

        # Keep only groups where more than one target shares the same primer pair
        duplicated_groups = groups[groups[self.target_name_col].str.len() > 1]

        if not duplicated_groups.empty:
            msg_lines = ["The following targets have duplicated information:"]
            for _, row in duplicated_groups.iterrows():
                cols_info = ", ".join(f"{col}={row[col]}" for col in columns_to_check)
                targets = ", ".join(map(str, row[self.target_name_col]))
                msg_lines.append(f"targets: {targets} → {cols_info}")

            raise ValueError("\n".join(msg_lines))

[docs]    def summarise_targets_missing_optional_info(self):
        """
        Warn about targets missing optional location fields.

        For each of insert, forward-primer, and reverse-primer locations that was
        requested, finds targets with empty coordinate fields and emits a warning.
        Targets listed here are skipped when their location block is built.

        :return: a tuple ``(missing_insert_loc, missing_fwd_primer_loc,
            missing_rev_primer_loc)``; each element is a list of target names, or
            None if that location type was not requested
        """
        missing_insert_loc = None
        missing_fwd_primer_loc = None
        missing_rev_primer_loc = None

        def check_missing(name, cols):
            missing = self.target_table[self.target_table[cols].isnull().any(axis=1)][
                self.target_name_col
            ].tolist()
            if len(missing) > 0:
                warnings.warn(
                    f"{name} location information was not added for the following targets that had empty fields: {', '.join(missing)}"
                )
            return missing

        missing_insert_loc = (
            check_missing(
                "Insert", [self.chrom_col, self.insert_start_col, self.insert_end_col]
            )
            if self.insert_start_col
            else None
        )
        missing_fwd_primer_loc = (
            check_missing(
                "Forward primer",
                [
                    self.chrom_col,
                    self.forward_primers_start_col,
                    self.forward_primers_end_col,
                ],
            )
            if self.forward_primers_start_col
            else None
        )
        missing_rev_primer_loc = (
            check_missing(
                "Reverse primer",
                [
                    self.chrom_col,
                    self.reverse_primers_start_col,
                    self.reverse_primers_end_col,
                ],
            )
            if self.reverse_primers_start_col
            else None
        )
        return missing_insert_loc, missing_fwd_primer_loc, missing_rev_primer_loc

[docs]    def build_target_info_dict(
        self,
        genome_id_col: str | None = None,
    ):
        """
        Build the list of target_info dictionaries from the target table.

        Validates target-name uniqueness and primer/location uniqueness, then
        assembles one dict per target including primer sequences and, where
        available, insert and primer genomic locations.

        :param genome_id_col: optional column holding the genome id for each
            target; if omitted, a genome_id of 0 is used
        :return: a list of target_info dictionaries
        """
        # Check targets before putting into JSON
        (
            forward_primers_start_col,
            forward_primers_end_col,
            reverse_primers_start_col,
            reverse_primers_end_col,
            insert_start_col,
            insert_end_col,
            chrom_col,
            strand_col,
            ref_seq_col,
        ) = self.location_info_cols if self.location_info_cols else [None] * 9

        # Check target information in the dataframe
        self.check_target_names_are_unique()
        columns_to_check = [self.forward_primers_seq_col, self.reverse_primers_seq_col]
        if self.location_info_cols:
            columns_to_check += [col for col in self.location_info_cols if col]
        self.check_unique_target_info(list(set(columns_to_check)))
        (
            missing_insert_loc,
            missing_fwd_primer_loc,
            missing_rev_primer_loc,
        ) = self.summarise_targets_missing_optional_info()

        # Put targets together in a dictionary
        targets_dicts = []
        for _, row in self.target_table.iterrows():
            target_name = row[self.target_name_col]
            target_dict = {
                "target_name": target_name,
            }
            if self.gene_name_col:
                target_dict["gene_name"] = row[self.gene_name_col]
            if self.target_attributes_col:
                target_dict["target_attributes"] = row[
                    self.target_attributes_col
                ].split(self.target_attributes_col_delimiter)
            if self.additional_target_info_cols:
                for col in self.additional_target_info_cols:
                    value = row[col]
                    # Convert numpy types to native Python types
                    if isinstance(value, (np.integer, np.int64)):
                        value = int(value)
                    elif isinstance(value, (np.floating, np.float64)):
                        value = float(value)
                    elif pd.isna(value):
                        value = None
                    target_dict[col] = value

            # Add insert location info if location_info_cols are provided
            if insert_start_col and target_name not in missing_insert_loc:
                if genome_id_col:
                    genome_id = int(row[genome_id_col])
                else:
                    genome_id = 0
                target_dict["insert_location"] = {
                    "genome_id": genome_id,
                    "chrom": row[chrom_col],
                    "start": int(row[insert_start_col]),
                    "end": int(row[insert_end_col]),
                }
                if strand_col and pd.notna(row[strand_col]):
                    target_dict["insert_location"]["strand"] = row[strand_col]
                if ref_seq_col and pd.notna(row[ref_seq_col]):
                    target_dict["insert_location"]["ref_seq"] = row[ref_seq_col]

            # Extract primer information for each row
            fwd_primer_dict = {"seq": row[self.forward_primers_seq_col]}
            rev_primer_dict = {"seq": row[self.reverse_primers_seq_col]}
            if forward_primers_start_col and target_name not in missing_fwd_primer_loc:
                if genome_id_col:
                    genome_id = int(row[genome_id_col])
                else:
                    genome_id = 0
                fwd_primer_dict["location"] = {
                    "genome_id": genome_id,
                    "chrom": row[chrom_col],
                    "end": int(row[forward_primers_start_col]),
                    "start": int(row[forward_primers_end_col]),
                }
                if strand_col and pd.notna(row[strand_col]):
                    fwd_primer_dict["location"]["strand"] = row[strand_col]
            if reverse_primers_start_col and target_name not in missing_rev_primer_loc:
                if genome_id_col:
                    genome_id = int(row[genome_id_col])
                else:
                    genome_id = 0
                rev_primer_dict["location"] = {
                    "genome_id": genome_id,
                    "chrom": row[chrom_col],
                    "end": int(row[reverse_primers_end_col]),
                    "start": int(row[reverse_primers_start_col]),
                }
                if strand_col and pd.notna(row[strand_col]):
                    rev_primer_dict["location"]["strand"] = row[strand_col]
            target_dict["forward_primer"] = fwd_primer_dict
            target_dict["reverse_primer"] = rev_primer_dict

            targets_dicts.append(target_dict)

        return targets_dicts

[docs]    def build_panel_info_dict(self, targets_dict):
        """
        Build the panel_info dictionary, grouping targets into reactions.

        If no reaction column was configured, all targets are placed in a single
        reaction named ``full``.

        :param targets_dict: the target_info list from :meth:`build_target_info_dict`
        :return: a panel_info dictionary with ``panel_name`` and ``reactions``,
            where each reaction lists target indices into ``targets_dict``
        """
        panel_dict = {"panel_name": self.panel_name, "reactions": []}
        target_indices = dict()
        for i, target_dict in enumerate(targets_dict):
            target_indices[target_dict["target_name"]] = i

        if self.reaction_name_col:
            reactions = (
                self.target_table[self.reaction_name_col]
                .str.split(self.reaction_name_col_delimiter)
                .explode()
                .str.strip()  # Remove leading/trailing whitespace
                .unique()
            )
        else:
            reactions = ["full"]
            self.target_table["reaction"] = "full"
            self.reaction_name_col = "reaction"

        for reaction in reactions:
            # Filter rows where the reaction column contains this reaction
            matching_rows = self.target_table[
                self.target_table[self.reaction_name_col]
                .str.split(self.reaction_name_col_delimiter)
                .apply(
                    lambda x: reaction in [item.strip() for item in x]
                    if isinstance(x, list)
                    else False
                )
            ]
            # Get the indices for these targets
            target_indices_for_reaction = [
                target_indices[target_name]
                for target_name in matching_rows[self.target_name_col]
            ]
            panel_dict["reactions"].append(
                {
                    "reaction_name": reaction,
                    "panel_targets": target_indices_for_reaction,
                }
            )
        return panel_dict


[docs]def check_genome_info(genome_info):
    """
    Validate that genome info contains the required keys.

    Accepts either a single genome dict or a list of them, and checks each for
    the keys ``name``, ``genome_version``, ``taxon_id``, and ``url``.

    :param genome_info: a genome dict or list of genome dicts
    :raises TypeError: if genome_info is not a dict or list, or a list element is not a dict
    :raises ValueError: if the list is empty or any entry is missing required keys
    :return: Nothing
    """
    if isinstance(genome_info, dict):
        required_keys = {"name", "genome_version", "taxon_id", "url"}
        missing_keys = required_keys - genome_info.keys()
        if missing_keys:
            raise ValueError(
                f"genome_info missing required keys: {', '.join(missing_keys)}"
            )
    elif isinstance(genome_info, list):
        if not genome_info:
            raise ValueError("genome_info list cannot be empty")
        required_keys = {"name", "genome_version", "taxon_id", "url"}
        for i, genome_dict in enumerate(genome_info):
            if not isinstance(genome_dict, dict):
                raise TypeError(
                    f"genome_info[{i}] must be a dict, but got {type(genome_dict).__name__}"
                )
            missing_keys = required_keys - genome_dict.keys()
            if missing_keys:
                raise ValueError(
                    f"genome_info[{i}] missing required keys: {', '.join(missing_keys)}"
                )
    else:
        raise TypeError(
            f"genome_info must be a dict or list, but got {type(genome_info).__name__}"
        )


[docs]def merge_panel_info_dicts(panel_info_dicts: list[dict]) -> dict:
    """
    Merge multiple panel_info dictionaries produced by panel_info_table_to_pmo.

    Target lists are concatenated (deduplicated by target_name) and all
    genome references are collapsed so that genome identifiers remain valid
    across the merged structure.

    :param panel_info_dicts: a list of panel_info dicts, each with ``target_info``
        and ``panel_info`` (and optionally ``targeted_genomes``)
    :raises ValueError: if the list is empty, a dict lacks ``target_info``, or a
        target has location data without accompanying ``targeted_genomes``
    :return: a merged dict with ``panel_info`` and ``target_info`` keys, plus
        ``targeted_genomes`` if any genomes were present
    """
    if not panel_info_dicts:
        raise ValueError("panel_info_dicts must contain at least one entry.")

    merged_targets: list[dict] = []
    target_name_to_index: dict[str, int] = {}
    merged_panels: list[dict] = []

    merged_genomes: list[dict] = []
    genome_signature_to_index: dict[str, int] = {}

    def canonicalise_genome(genome: dict) -> str:
        return json.dumps(genome, sort_keys=True)

    def remap_genome_ids(target_entry: dict, mapping: dict[int, int]) -> None:
        insert_loc = target_entry.get("insert_location")
        if insert_loc and "genome_id" in insert_loc:
            old_id = insert_loc["genome_id"]
            if old_id in mapping:
                insert_loc["genome_id"] = mapping[old_id]

        for primer_key in ("forward_primer", "reverse_primer"):
            primer = target_entry.get(primer_key)
            if primer and isinstance(primer, dict):
                primer_loc = primer.get("location")
                if primer_loc and "genome_id" in primer_loc:
                    old_id = primer_loc["genome_id"]
                    if old_id in mapping:
                        primer_loc["genome_id"] = mapping[old_id]

    for panel_dict in panel_info_dicts:
        genome_mapping: dict[int, int] = {}
        if "targeted_genomes" in panel_dict:
            for idx, genome in enumerate(panel_dict["targeted_genomes"]):
                signature = canonicalise_genome(genome)
                if signature not in genome_signature_to_index:
                    genome_signature_to_index[signature] = len(merged_genomes)
                    merged_genomes.append(genome)
                genome_mapping[idx] = genome_signature_to_index[signature]

        if "target_info" not in panel_dict:
            raise ValueError("panel_info_dict missing 'target_info'.")

        for target in panel_dict["target_info"]:
            target_name = target.get("target_name")
            if target_name is None:
                raise ValueError("Each target_info entry must include a 'target_name'.")

            if target_name not in target_name_to_index:
                target_copy = copy.deepcopy(target)
                if "targeted_genomes" in panel_dict:
                    remap_genome_ids(target_copy, genome_mapping)
                else:
                    # check to see if there is location data but no genomes loaded
                    if "insert_location" in target_copy:
                        raise ValueError(
                            "target"
                            + target_name
                            + " has insert_location but no targeted_genomes information is included"
                        )
                    if "location" in target_copy["forward_primer"]:
                        raise ValueError(
                            "target"
                            + target_name
                            + " has forward primer location but no targeted_genomes information is included"
                        )
                    if "location" in target_copy["reverse_primer"]:
                        raise ValueError(
                            "target"
                            + target_name
                            + " has reverse primer location but no targeted_genomes information is included"
                        )
                target_name_to_index[target_name] = len(merged_targets)
                merged_targets.append(target_copy)

        for panel in panel_dict.get("panel_info", []):
            remapped_panel = {"panel_name": panel["panel_name"], "reactions": []}
            for reaction in panel.get("reactions", []):
                remapped_targets = []
                for target_idx in reaction["panel_targets"]:
                    target_name = panel_dict["target_info"][target_idx]["target_name"]
                    remapped_targets.append(target_name_to_index[target_name])
                remapped_panel["reactions"].append(
                    {
                        "reaction_name": reaction["reaction_name"],
                        "panel_targets": remapped_targets,
                    }
                )
            merged_panels.append(remapped_panel)
    ret = {"panel_info": merged_panels, "target_info": merged_targets}
    if len(merged_genomes) > 0:
        ret["targeted_genomes"] = merged_genomes
    return ret


[docs]def panel_info_table_to_pmo(
    target_table: pd.DataFrame,
    panel_name: str,
    genome_info: dict | list | None = None,
    target_name_col: str = "target_name",
    forward_primers_seq_col: str = "fwd_primer",
    reverse_primers_seq_col: str = "rev_primer",
    reaction_name_col: str | None = None,
    reaction_name_col_delimiter: str = ",",
    forward_primers_start_col: str | None = None,
    forward_primers_end_col: str | None = None,
    reverse_primers_start_col: str | None = None,
    reverse_primers_end_col: str | None = None,
    insert_start_col: str | None = None,
    insert_end_col: str | None = None,
    chrom_col: str | None = None,
    strand_col: str | None = None,
    ref_seq_col: str | None = None,
    gene_name_col: str | None = None,
    genome_id_col: str | None = None,
    target_attributes_col: str | None = None,
    target_attributes_col_delimiter: str = ",",
    additional_target_info_cols: list | None = None,
):
    """
    Convert a dataframe containing panel information into dictionary of targets and reference information

    :param target_table: the dataframe containing the target information
    :type target_table: pd.DataFrame
    :param panel_name: the panel ID assigned to the panel
    :type panel_name: str
    :param genome_info: reference genome information, needed if the target info contains genome location
    :type genome_info: dict or list, optional
    :param target_name_col: the name of the column containing the target IDs. Default: target_name
    :type target_name_col: str
    :param forward_primers_seq_col: the name of the column containing the sequence of the forward primer. Default: fwd_primer
    :type forward_primers_seq_col: str
    :param reverse_primers_seq_col: the name of the column containing the sequence of the reverse primer. Default: rev_primer
    :type reverse_primers_seq_col: str
    :param reaction_name_col: the name of the column containing which reaction the target was part of. By default they will all be put in one reaction.
    :type reaction_name_col: str, optional
    :param reaction_name_col_delimiter: the delimiter used to split the reaction name column into multiple reactions. Default is a comma.
    :type reaction_name_col_delimiter: str
    :param forward_primers_start_col: the name of the column containing the 0-based start coordinate of the forward primer
    :type forward_primers_start_col: str, optional
    :param forward_primers_end_col: the name of the column containing the 0-based end coordinate of the forward primer
    :type forward_primers_end_col: str, optional
    :param reverse_primers_start_col: the name of the column containing the 0-based start coordinate of the reverse primer
    :type reverse_primers_start_col: str, optional
    :param reverse_primers_end_col: the name of the column containing the 0-based end coordinate of the reverse primer
    :type reverse_primers_end_col: str, optional
    :param insert_start_col: the name of the column containing the 0-based start coordinate of the insert
    :type insert_start_col: str, optional
    :param insert_end_col: the name of the column containing the 0-based end coordinate of the insert
    :type insert_end_col: str, optional
    :param chrom_col: the name of the column containing the chromosome for the target
    :type chrom_col: str, optional
    :param gene_name_col: the name of the column containing the gene id
    :type gene_name_col: str, optional
    :param strand_col: the name of the column containing the strand for the target
    :type strand_col: str, optional
    :param ref_seq_col: the name of the column containing the reference sequence for the insert
    :type ref_seq_col: str, optional
    :param target_attributes_col: a list of classification type for the primer target
    :type target_attributes_col: str, optional
    :param target_attributes_col_delimiter: the delimiter used to split the target attributes column into multiple attributes. Default is a comma.
    :type target_attributes_col_delimiter: str
    :param genome_id_col: the name of the column containing the genome ID (default is 0)
    :type genome_id_col: str, optional
    :param additional_target_info_cols: a list of additional column names to copy verbatim into each target information dictionary
    :type additional_target_info_cols: list, optional
    :return: a dict of the panel information
    :rtype: dict
    """

    if not isinstance(target_table, pd.DataFrame):
        raise ValueError("target_table must be a pandas DataFrame.")

    # Check additional columns if any are added
    check_additional_columns_exist(target_table, additional_target_info_cols)
    builder = PMOPanelBuilder(
        target_table=target_table,
        panel_name=panel_name,
        target_name_col=target_name_col,
        forward_primers_seq_col=forward_primers_seq_col,
        reverse_primers_seq_col=reverse_primers_seq_col,
        reaction_name_col=reaction_name_col,
        reaction_name_col_delimiter=reaction_name_col_delimiter,
        forward_primers_start_col=forward_primers_start_col,
        forward_primers_end_col=forward_primers_end_col,
        reverse_primers_start_col=reverse_primers_start_col,
        reverse_primers_end_col=reverse_primers_end_col,
        insert_start_col=insert_start_col,
        insert_end_col=insert_end_col,
        chrom_col=chrom_col,
        strand_col=strand_col,
        ref_seq_col=ref_seq_col,
        gene_name_col=gene_name_col,
        target_attributes_col=target_attributes_col,
        target_attributes_col_delimiter=target_attributes_col_delimiter,
        additional_target_info_cols=additional_target_info_cols,
    )

    # check and process genome_info if given
    # Convert genome_info to list if it's a dict
    if genome_info and isinstance(genome_info, dict):
        genome_info = [genome_info]

    if genome_info:
        check_genome_info(genome_info)

    # Create a dictionary of targets and panels
    targets_dict = builder.build_target_info_dict(genome_id_col)
    if not genome_info:
        for target in targets_dict:
            if "insert_location" in target:
                raise Exception(
                    "insert_location is provided for "
                    + target["target_name"]
                    + " but no targeted_genomes is not provided."
                )
            if "location" in target["forward_primer"]:
                raise Exception(
                    "location is provided for "
                    + target["target_name"]
                    + " but no targeted_genomes is not provided."
                )
            if "location" in target["reverse_primer"]:
                raise Exception(
                    "location is provided for "
                    + target["target_name"]
                    + " but no targeted_genomes is not provided."
                )
    panel_dict = builder.build_panel_info_dict(targets_dict)
    # Put together components
    panel_info_dict = {
        "panel_info": [panel_dict],
        "target_info": targets_dict,
    }
    remove_optional_null_values(
        panel_info_dict["target_info"],
        ["gene_name", "insert_location", "markers_of_interest", "target_attributes"],
    )

    if genome_info:
        panel_info_dict["targeted_genomes"] = genome_info
        remove_optional_null_values(
            panel_info_dict["target_info"], ["chromosomes", "gff_url"]
        )

    return panel_info_dict