Skip to content

Raw ATAC Sample Snippet

Raw ATAC Sample Snippet

This page preserves an existing exploratory code snippet from the repository. It is included in the docs navigation as a low-level developer reference rather than polished end-user guidance.

import pandas as pd

def parse_column(row):
    result = []
    # 按分号分隔

    records = row.split(";")
    for record in records:
        if not record:  # 跳过空记录

            continue

        # 按 | 分隔

        parts = record.split("|")
        key = parts[0]
        values = parts[1] if len(parts) > 1 else ""
        # 按 & 分隔

        sub_records = values.split("&")
        for sub_record in sub_records:
            # 按 _ 分隔

            sub_parts = sub_record.split("_")
            if len(sub_parts) == 2:
                result.append([key, sub_parts[0], sub_parts[1]])

    result_df = pd.DataFrame(result, columns=["Allele_index", "Read_name", "Barcode"])
    result_df["Read_name"] = result_df["Read_name"].str.replace("-", ":")
    return result_df

sample = "D19-8606"

root = "/storage/douyanmeiLab"
bam_path = f"{root}/wangchunyi/callSTR/AD_scATAC/Bam/{sample}.possorted_bam.bam"
CB_path = f"{root}/wangchunyi/callSTR/AD_scATAC/CellAnnotation/union/{sample}_cell_barcode_union.txt"
vcf_path = f"{root}/wangchunyi/callSTR/AD_scATAC/bulkmonstr/06prediction/bulkmonstr_prediction_output/{sample}/{sample}_chr1_1_1000000_prediction_output.txt"
vcf = pd.read_csv(vcf_path)
CB_list = pd.read_csv(CB_path,header=None,names=['Barcode'])

row = vcf[vcf['str_id'] == 'Human_STR_32'].iloc[0]['ALLELE_BARCODE_MOSAIC']
result_df = parse_column(row)
print(result_df)

result_df_filtered = result_df[result_df['Barcode'].isin(CB_list['Barcode'])]
print(result_df_filtered)

mutant_cell = result_df_filtered['Barcode'].unique()
print(mutant_cell)

non_mutant_cell = CB_list[~CB_list['Barcode'].isin(mutant_cell)]['Barcode'].to_list()
print(non_mutant_cell)

reference_start_index_0_based_include,reference_end_index_0_based_include,reference_start_coordinate_1_based_include,reference_end_coordinate_1_based_include