Raw ATAC Sample Snippet
Raw ATAC Sample Snippet¶
This page preserves an existing exploratory code snippet from the repository. It is included in the docs navigation as a low-level developer reference rather than polished end-user guidance.
import pandas as pd
def parse_column(row):
result = []
# 按分号分隔
records = row.split(";")
for record in records:
if not record: # 跳过空记录
continue
# 按 | 分隔
parts = record.split("|")
key = parts[0]
values = parts[1] if len(parts) > 1 else ""
# 按 & 分隔
sub_records = values.split("&")
for sub_record in sub_records:
# 按 _ 分隔
sub_parts = sub_record.split("_")
if len(sub_parts) == 2:
result.append([key, sub_parts[0], sub_parts[1]])
result_df = pd.DataFrame(result, columns=["Allele_index", "Read_name", "Barcode"])
result_df["Read_name"] = result_df["Read_name"].str.replace("-", ":")
return result_df
sample = "D19-8606"
root = "/storage/douyanmeiLab"
bam_path = f"{root}/wangchunyi/callSTR/AD_scATAC/Bam/{sample}.possorted_bam.bam"
CB_path = f"{root}/wangchunyi/callSTR/AD_scATAC/CellAnnotation/union/{sample}_cell_barcode_union.txt"
vcf_path = f"{root}/wangchunyi/callSTR/AD_scATAC/bulkmonstr/06prediction/bulkmonstr_prediction_output/{sample}/{sample}_chr1_1_1000000_prediction_output.txt"
vcf = pd.read_csv(vcf_path)
CB_list = pd.read_csv(CB_path,header=None,names=['Barcode'])
row = vcf[vcf['str_id'] == 'Human_STR_32'].iloc[0]['ALLELE_BARCODE_MOSAIC']
result_df = parse_column(row)
print(result_df)
result_df_filtered = result_df[result_df['Barcode'].isin(CB_list['Barcode'])]
print(result_df_filtered)
mutant_cell = result_df_filtered['Barcode'].unique()
print(mutant_cell)
non_mutant_cell = CB_list[~CB_list['Barcode'].isin(mutant_cell)]['Barcode'].to_list()
print(non_mutant_cell)
reference_start_index_0_based_include,reference_end_index_0_based_include,reference_start_coordinate_1_based_include,reference_end_coordinate_1_based_include