2

How can i avoid duplicate data when inserting from CSV file into my SQL server 2008 ?

 #region Put to SQL
                string line = null;
                bool IsFirst = true;

                string SqlSyntax = "INSERT INTO ORDRE ";
                string sqlkey = "";
                string sqlvalSELECT = "";

                using (StreamReader sr = File.OpenText(filePath + "\\" + downloadname))
                {
                    while ((line = sr.ReadLine()) != null)
                    {
                        string[] data = line.Split(';');

                        if (!String.IsNullOrEmpty(sqlvalSELECT)) sqlvalSELECT += "\nUNION ALL ";

                        if (data.Length > 0)
                        {
                            string sqlval = "";
                            foreach (object item in data)
                            {
                                if (IsFirst)
                                {
                                    if (!String.IsNullOrWhiteSpace(sqlkey)) sqlkey += ",";
                                    sqlkey += item.ToString();

                                }
                                else
                                {
                                    if (!String.IsNullOrEmpty(sqlval)) sqlval += ",";
                                    sqlval +=  item.ToString();
                                }
                            }
                            if (!String.IsNullOrEmpty(sqlval)) sqlvalSELECT += "SELECT " + sqlval;

                            IsFirst = false;

                        }
                    }
                }


                string sqlTOTAL = SqlSyntax + "(" + sqlkey + ")" + sqlvalSELECT;
                //lbl_Message.Text = sqlTOTAL;

                try
                {
                    using (var connectionWrapper = new Connexion())
                    {
                        var connectedConnection = connectionWrapper.GetConnected();
                        SqlCommand comm_Ftp_Insert = new SqlCommand(sqlTOTAL, connectionWrapper.conn);
                        comm_Ftp_Insert.ExecuteNonQuery();

                    }
                }
                catch (Exception ex)
                {
                    throw new Exception(ex.Message);
                }

                #endregion

i make the methode to collect the data that already imported into SQL Server 2008. how can i compare this to the CSV file ?

 /// <summary>
        /// Get the existed data on SQL
        /// </summary>
        /// <returns>Return List of Pers_Ordre with key OrdreId and ClientID</returns>
        public List<Pers_Ordre> Get_Existed()
        {
            try
            {
                using (var connectionWrapper = new Connexion())
                {
                    var connectedConnection = connectionWrapper.GetConnected();
                    List<Pers_Ordre> oListOdr = new List<Pers_Ordre>();

                    string sql_Syntax = Outils.LoadFileToString(HttpContext.Current.Server.MapPath("~/SQL/OrdreFTP_GetExist.sql"));
                    SqlCommand comm_Command = new SqlCommand(sql_Syntax, connectionWrapper.conn);

                    SqlDataReader readerOne = comm_Command.ExecuteReader();
                    while (readerOne.Read())
                    {
                        Pers_Ordre oPersOrdre = new Pers_Ordre();
                        oPersOrdre.OrdreId = Convert.ToInt32(readerOne["NO_ORDRE"]);
                        oPersOrdre.ClientID = readerOne["CODE_CLIENT"].ToString();                   
                        oListOdr.Add(oPersOrdre);
                    }
                    return oListOdr;
                }
            }
            catch (Exception excThrown)
            {
                throw new Exception(excThrown.Message);
            }

        }

Thanks you in advance, Stev

2
  • 1
    A few minor suggestions; use Path.Combine(filePath, downloadname) not filePath + "\\" + downloadname. And if there is any way for a user to alter the contents of downloadname your code is vulnerable to SQL injection. Commented Dec 21, 2011 at 18:00
  • There are a few other strange things with this code; if you want to learn more post it to codereview.stackexchange.com. Commented Dec 21, 2011 at 18:16

2 Answers 2

4

Why not just insert the data from the csv into a temporary table and filter what you insert into the destination table to remove duplicate lines. That way you can let the database do the work which will be quicker anyway.

This is the easiest sql for what you need

insert into Order
  select * from Order_Temp
  WHERE NOT EXISTS
  (
    SELECT X
    FROM Order o
    WHERE o.NO_ORDRE = Order_Temp.NO_ORDRE
    AND o.CODE_CLIENT = Order_Temp.CODE_CLIENT
  )

Hope it helps

Sign up to request clarification or add additional context in comments.

5 Comments

you could create a holding table to insert into and then write a proc to select items not in the real table, remember to have this proc TRUNCATE the data in the holding table.
Also, have a look at this stackoverflow.com/questions/83471 it has a few ideas that might help you :)
Could you give an SQL example please ? My Idea i have 2 Table Order and Order_Temp, and then i use INNER JOIN to find duplicate --> result _TEMP . and then i RIGHT JOIN from Result_Temp....Do you have another idea ?
Thanks you for your responds, but i have 2 keys : 'INSERT INTO ORDRE select * from ORDRE_FTP_TEMP where (NO_ORDRE not in(select NO_ORDRE from ORDRE) AND CODE_CLIENT not in(select CODE_CLIENT from ORDRE)) ' it not work with 2 keys
0

You could add unique constraints to the columns in your DB you don't want to duplicate. Then wrap your code in a try {} catch {}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.