"""This is the Python code used to generate the synthetic grain size populations showed in the
manuscript entitled: An evaluation of different measures of dynamically recrystallized grain
size for paleopiezometry or paleowattometry studies by Lopez-Sanchez and Llana-Funez

Requirements: Python 2.7.x or higher; Numpy 1.5 or higher"""

from __future__ import division, print_function # this is to avoid python 2.7.x - 3.x compatibility issues

# import the required modules
import random
import numpy as np

def generateRandomSections(sampleSize, diameter=2):
    """Generates a defined number of random sections of a spherical 
    particle of diameter defined by the user. Returns a list with the 
    dataset generated.
    
    INPUTS    
    sampleSize: the number of random sections or the sample size. An integer
    diameter: the diameter defined by the user, an integer. It is set to 2 
    by default.
    """
    
    dataset = []
    
    for i in range(sampleSize):
        x = random.random() # generates a random floating point number between 0.0 and 1.0
        chordLength = 2*np.sqrt(1-x**2) # calculates the section length
        dataset.append(chordLength)
    
    # correct sections according to the selected diameter
    if diameter != 2:
        correctionFactor = diameter/2.0
        dataset = [x*correctionFactor for x in dataset]
    
    return dataset
    
def generateRandomSections_withUncertaninty(sampleSize, diameter=2, uncertainty=0.04):
    """Generates a defined number of random sections of a spherical particle of 
    diameter defined by the user and adding a defined degree of uncertainty for
    each data in the dataset. Returns a list with the dataset generated. It is
    assumed that the error follows a normal distribution
    
    INPUTS    
    sampleSize: the size of the population. An integer
    diameter: the diameter defined by the user. An integer
    uncertaninty: the defined maximum uncertainty of the data. A float number between 0.0 and 1.0. It is 0.04 as default.
    """
    
    dataset = []
    new_dataset = []
        
    # generate random section
    for i in range(sampleSize):
        x = random.random()
        chordLength = 2*np.sqrt(1-x**2)
        dataset.append(chordLength)
    
    # correct sections according to the selected diameter
    if diameter != 2:
        correctionFactor = diameter/2.0
        dataset = [x*correctionFactor for x in dataset]
    
    # generate uncertainty within the dataset
    abs_uncertainty = diameter*uncertainty # calculates the absolute uncertainty
        
    for x in dataset:
        if x < abs_uncertainty:
            None # this prevents to create negative values
        else:
            randnormValue = np.random.normal(loc=0.0, scale=0.3) # get a random value from a normal distribution with a mean of zero and a standard deviation of 0.3
            error = randnormValue*abs_uncertainty # add a random normal error within the range of uncertainty defined
            x += error
            new_dataset.append(x)
            
    # use list comprehension to remove values under the absolute uncertainty
    # simulates the optical or resolution limitations of the applied technique
    clean_dataset = [x for x in new_dataset if x > abs_uncertainty]
    
    print (' ')
    print ('The sample size after removing values below the absolute uncertainty is: ', len(clean_dataset))
    print (' ')
           
    return clean_dataset      
    
    
def generatebimodalSample(popA, popB, sampleSize, ratio):
    """Generates an artificial sample with two populations of grain size
    in different proportion. Returns a txt file with the dataset generated.
    The file is within the folder that contains this script.
    
    INPUTS
    popA: the grain size of the population A, an integer
    popB: the grain size of the population B, an integer
    sampleSize: the size of the total population. An integer
    ratio: the ratio between the two populations, a float. If ratio = 0.8
    it means that population-A represents 80 percent -in number- of the total
    population.    
    """
    
    samplelist = []

    # calculate volumes
    volumePopA = (4/3.*np.pi*popA**3)*float(sampleSize*ratio)
    volumePopB = (4/3.*np.pi*popB**3)*float(sampleSize*(1-ratio))
    volumeTot = volumePopA + volumePopB

    volPopA = volumePopA/volumeTot # per unit volume of population A
    print ('volumen of PopA =', volPopA)
    print (' ')

    # simulates the intersection probability and the cut-section effects 
    for i in range(sampleSize):
        randomNumber = random.random() # generates a random floating point number between 0.0 and 1.0
        if randomNumber <= volPopA:
            value = generateRandomSections(1, diameter=popA)
            samplelist.append(value[0])

        else:
            value = generateRandomSections(1, diameter=popB)
            samplelist.append(value[0])


    # create a file
    dataFile = open('bimodsample.txt', 'w')
    for item in samplelist:
        dataFile.write(str(item)+'\n')
    dataFile.close()

    print('The file bimodsample.txt was created')
    
    return None
    
    
def generateSample_withOutliers(grainSize, sampleSize, ratio):
    """Generate an artificial sample with a user-defined quantity of
    outliers. Returns a txt file with the dataset generated. The file
    is within the folder that contains the script.
    
    INPUTS
    grainSize: the grain size, an integer.
    sampleSize: the sample size, an integer.
    ratio: the ration between the correct measures and the outliers. A float
           between 0.0 and 1.0. If ratio = 0.8 it means that outliers represents
           20 percent of the population.
    """
    # create the population without outliers
    sampleSize_A = int(round(sampleSize*ratio))
    samplelist = generateRandomSections(sampleSize_A, diameter=grainSize)
    
    # create the outliers (the size of the outliers varies between 1.01 to 1.5 times the actual size of the recrystallized grains)
    sampleSize_B = int(round(sampleSize*(1-ratio)))
    values = np.arange(1.01, 1.51, 0.01) # create an array with values from 1.01 to 1.5, step 0.01
    
    for i in range(sampleSize_B):
        outlierDiameter = random.choice(values)*grainSize # generate the actual diameter of the outlier
        outlierSection = generateRandomSections(sampleSize=1, diameter=outlierDiameter) # generate the random section of the outlier
        samplelist.append(outlierSection[0])
            
    # create a file
    dataFile = open('outliersample.txt', 'w')
    for item in samplelist:
        dataFile.write(str(item)+'\n')
    dataFile.close()

    print('The file outliersample.txt was created')
    
    return None
    
def generateSample_withUncertainty(grainsize, sampleSize, uncertainty):
    """Generates an artificial sample simulating uncertainty during the
    measure of the raw data. Returns a txt file with the dataset generated.
    The file is within the folder that contains this script.
    
    INPUTS
    grainSize: the size of the grains
    sampleSize: the size of the sample
    uncertainty: the maximum measure error expected during the grain delineation.
                 A float number between 0.0 and 1.0
    """
    
    # create random section with uncertainty
    sample = generateRandomSections_withUncertaninty(sampleSize, grainsize, uncertainty)
    
    # create a file
    dataFile = open('uncert_sampleX.txt', 'w')
    for item in sample:
        dataFile.write(str(item)+'\n')
    dataFile.close()
    
    return 'The file uncert_sampleX.txt was created'