23andme¶

23andme to PED-MAP¶

#1. Convert 23andme to BED-BIM-FAM --> 23andmetoBED-BIM-FAM()
#That function will generates three files 23andmetoBED.bed, 23andmetoBED.bim, and 23andmetoBED.fam
# Step 2
./plink --bfile 23andmetoBED --recode --out output_file

23andme to RAW¶

#1. Convert 23andme to BED-BIM-FAM --> 23andmetoBED-BIM-FAM()
#That function will generates three files 23andmetoBED.bed, 23andmetoBED.bim, and 23andmetoBED.fam
# Step 2
./plink --bfile 23andmetoBED --recodeA --out output_file

23andme to BED-BIM-FAM¶

# Place 23andme files in a new directory.
# input_file is actually input directory.

if not os.path.isdir(input_file):
   print("Directory "+input_file+" does not exists...! Kindly place 23andme files in that directory.")
   exit(0)
files = []
allfiles = os.listdir("./"+input_file+"/")
personname = []
sexinfo = "0"
for loop in allfiles:
   if ".23andme" in loop:

      data = loop.split(".")[0].split("_")
      personname.append(data[0])
      print(data)
      if data[5] =="XX":
         sexinfo = "2"
      elif data[5] =="XY":
         sexinfo = "1"
      else:
         sexinfo = "0"
      #os.rename("tutorialsdir","tutorialsdirectory")
      os.system("./plink --23file ./"+input_file+os.sep+loop+" --snps-only --make-bed  --out ./"+input_file+os.sep+data[0])
      if os.path.exists("./"+input_file+os.sep+data[0]+".fam"):
         data2 = pd.read_csv("./"+input_file+os.sep+data[0]+".fam",header=None, sep="\s+")
         data2[0] = data[0]
         data2[1] = data[0]
         data2[4] = sexinfo
         data2.to_csv("./"+input_file+os.sep+data[0]+".fam",sep="\t",header=False,index=False)
allfiles =  os.listdir("./"+input_file+"/")
count=0
files=[]
for loop in allfiles:
   if ".txt" in loop and ".bed" not in loop and ".fam" not in loop and ".bim" not in loop:
      print(loop)
      x = loop.split("_")[0]
      x = x + ".fam"
      me = os.path.exists("./"+input_file+os.sep+x)
      if me==True:
         x = x.split(".")[0]
         x = "./"+input_file+os.sep+x +".bed " + "./"+input_file+os.sep+x + ".bim " +  "./"+input_file+os.sep+x + ".fam"
         files.append(x)
      else:
         count=count+1
      print(count," People removed due to missing fam file")
with open("./"+input_file+os.sep+"All.txt", "w") as filehandle:
   for listitem in files:
     filehandle.write('%s\n' % listitem)

os.system("./plink --merge-list ./"+input_file+os.sep+"/All.txt --make-bed --out 23andmetoBED")

if os.path.exists("23andmetoBED.bed"):
   exit(0)
else:
   allfiles =  os.listdir("./"+input_file+"/")
   count=0
   for loop in allfiles:
      if ".bed" in loop:
         x = loop
         x = x.split(".")[0]

         command = "./plink --bfile ./"+input_file+os.sep+x+" --exclude 23andmetoBED-merge.missnp --make-bed --out ./"+input_file+os.sep + x
         os.system(command)
   allfiles =  os.listdir("./"+input_file+"/")
   files=[]

   for loop in allfiles:
      if ".txt" in loop and ".bed" not in loop and ".fam" not in loop and ".bim" not in loop:
         print(loop)
         x = loop.split("_")[0]
         x = x + ".fam"
         me = os.path.exists("./"+input_file+os.sep+x)
         if me==True:
            x = x.split(".")[0]
            x = "./"+input_file+os.sep+x +".bed " + "./"+input_file+os.sep+x + ".bim " +  "./"+input_file+os.sep+x + ".fam"
            files.append(x)
         else:
            count=count+1
         print(count," People removed due to missing fam file")
   with open("./"+input_file+os.sep+"All.txt", "w") as filehandle:
     for listitem in files:
         filehandle.write('%s\n' % listitem)
   os.system("./plink --merge-list ./"+input_file+os.sep+"/All.txt --make-bed --out 23andmetoBED")

23andme to GEN-SAMPLE¶

#1. Convert 23andme to BED-BIM-FAM --> 23andmetoBED-BIM-FAM()
#That function will generates three files 23andmetoBED.bed, 23andmetoBED.bim, and 23andmetoBED.fam
# Step 2
./plink --bfile 23andmetoBED  --export oxford --out output_file

23andme to VCF¶

#1. Convert 23andme to BED-BIM-FAM --> 23andmetoBED-BIM-FAM()
#That function will generates three files 23andmetoBED.bed, 23andmetoBED.bim, and 23andmetoBED.fam
# Step 2
./plink --bfile 23andmetoBED --recode vcf --out output_file

23andme to AncestryDNA¶

# Make a directory in which AncestryDNA files will be saved.
if not os.path.isdir("AncestryDNA"):
   os.mkdir("AncestryDNA")
_23andmefiles  = os.listdir("./"+input_file)


#Read files one-by-one
for files in _23andmefiles:
   # 23andme files are in .txt file format
   if "23andme.txt" in files and "temp" not in files:

      #Check size
      if os.stat("./"+input_file+os.sep+files).st_size == 0:
         continue
      else:
         print(files)
         data = pd.read_csv("./"+input_file+os.sep+files,sep="\t", comment='#',header=None,low_memory=False)
         new = pd.DataFrame()
         new['Rsid'] = data[0].values
         new['Chromosome'] = data[1].values
         new['position'] = data[2].values


         #Split genotype into allele1 and allele2
         new['allele1'] = data[3].str[0]
         new['allele2'] =data[3].str[1]

         #Change chromosome numbers
         new['Chromosome'] = new['Chromosome'].replace(23, 'X')
         new['Chromosome'] = new['Chromosome'].replace(24, 'Y')
         new['Chromosome'] = new['Chromosome'].replace(25, 'XY')
         new['Chromosome'] = new['Chromosome'].replace(26, 'MT')


         #Rename file name
         files = files.replace("23andme","ancestry")
         new.to_csv("./AncestryDNA"+os.sep+files, sep="\t",index=False)

23andme to HAPS-LEGEND-SAMPLE¶

#1. Convert 23andme to BED-BIM-FAM --> 23andmetoBED-BIM-FAM()
#That function will generates three files 23andmetoBED.bed, 23andmetoBED.bim, and 23andmetoBED.fam
# Step 2
./plink --bfile 23andmetoBED --recode vcf --out output_file

# Step 3
bcftools convert output_file.vcf  -h  output_file2