Recently, I needed to perform several joins in a SQL query and return that data to a DataGrid. Normally this works great because I only have 2-3 tables I am performing a join on and the number of records being returned is relatively low. In this latest case I had many tables to join together and some of them had millions of rows in them. 
The query to pull all the data I needed would run for several minutes before returning anything to me. I decided that to speed this up I would handle the joins in the application and just pull the rows I needed in the queries themselves. This took a query that would return in 15+ minutes down to less than 1 minute.
Microsoft has some code here:  on how to do this. The code they had was over-engineered for what I needed to do so I decided to write my own. 
The main method takes 2 tables and returns a new table. It is quick and dirty and can still be fleshed out if needed. 
public static class MergeData
    {
        public static DataTable Merge(DataTable tableOne, DataTable tableTwo)
        {
            DataTable output;
            output = new DataTable();
            CopyColumns(tableOne, ref output);
            CopyColumns(tableTwo, ref output);
            CopyData(tableOne, tableTwo, ref output);
            return output;
        }
        public static void CopyData(DataTable tableOne, DataTable tableTwo, ref DataTable destinationTable)
        {
            foreach (DataRow row in tableOne.Rows)
            {
                DataRow nr;
                DataRow[] joinedRows;
                nr = destinationTable.NewRow();
                foreach (DataColumn dc in tableOne.Columns)
                {
                    nr[dc.Table.TableName + "." + dc.ColumnName] = row[dc.ColumnName];
                }
                string selectStatement = tableTwo.Columns[1].ColumnName + " = '" + row[tableOne.Columns[1].ColumnName] + "'";
                joinedRows = tableTwo.Select(selectStatement);
                foreach (DataColumn dc in tableTwo.Columns)
                {
                    nr[dc.Table.TableName + "." + dc.ColumnName] = joinedRows[0][dc.ColumnName];
                }
                destinationTable.Rows.Add(nr);
            }
        }
        
        public static void CopyColumns(DataTable table, ref DataTable output)
        {
            foreach (DataColumn dc in table.Columns)
            {
                DataColumn nc;
                nc = new DataColumn(table.TableName + "." + dc.ColumnName, dc.DataType);
                AddColumn(nc, ref output);
            }
        }
        public static void AddColumn(DataColumn dc, ref DataTable dt)
        {
            if (!dt.Columns.Contains(dc.ColumnName))
            {
                dt.Columns.Add(dc);
            }
        }
    }
Wednesday, May 6, 2009
Subscribe to:
Comments (Atom)
 
 
 Posts
Posts
 
