Counter of Festivals

Ashok Blog for SQL Learners and Beginners and Experts

Sunday 24 December 2017

how to configure Distributed AG in SQL 2016?

Configure  Distributed AG in SQL 2016

Before configure DAG

Let look AG should need WSFC and Local drive 



Distributed AG Configuration Pre-requisites :
=============================================

1. .Net framework 3.5 and failover cluster in server manager need to be installed to perform the SQL server installation.
2. Database Data and log drive same path on primary and secondary server to configure the DAG.
3. Enable AG in configuration manager.
4. SQL Service mandatory to run with Service account.
5. DB recovery mode : Full
6. One Full DB backup.
7. One Tlog backup.

Note : These full and log not required to be restored on the secondary, its just the backup to meet the prerequisite.
after DAG configuration, automatically DB seeds the DB in secondary server.


Steps to configure DAG by scripts.
===================================


-- execute below script in all replicas to Create endpoint on all Availability Group replicas.
==============================================================================================

USE [master]
GO

CREATE ENDPOINT [Hadr_endpoint]
   STATE=STARTED
   AS TCP (LISTENER_PORT = 5022, LISTENER_IP = ALL)
   FOR DATA_MIRRORING (ROLE = ALL, AUTHENTICATION = WINDOWS NEGOTIATE
      , ENCRYPTION = REQUIRED ALGORITHM AES)
GO



GRANT CONNECT ON ENDPOINT::[Hadr_endpoint] TO [ST\serviceaccount]


-- Create AG group in First replica.
====================================

-- TCP://Servername.TEST.COM:5022

create AVAILABILITY GROUP AG_P_01
FOR DATABASE [DB_Admin]
REPLICA ON N'Servername' WITH (ENDPOINT_URL = N'TCP://Servername.TEST.COM:5022',
    FAILOVER_MODE = AUTOMATIC,
    AVAILABILITY_MODE = SYNCHRONOUS_COMMIT,  
    BACKUP_PRIORITY = 50,  
    SECONDARY_ROLE(ALLOW_CONNECTIONS = NO),  
    SEEDING_MODE = AUTOMATIC),  
N'Servername' WITH (ENDPOINT_URL = N'TCP://Servername.TEST.COM:5022',  
    FAILOVER_MODE = AUTOMATIC,  
    AVAILABILITY_MODE = SYNCHRONOUS_COMMIT,  
    BACKUP_PRIORITY = 50,  
    SECONDARY_ROLE(ALLOW_CONNECTIONS = NO),  
    SEEDING_MODE = AUTOMATIC);  
GO


-- execute below script to Join AG group 01 in second replica
=============================================================

ALTER AVAILABILITY GROUP AG_P_01 JOIN  

ALTER AVAILABILITY GROUP AG_P_01 GRANT CREATE ANY DATABASE
GO


-- execute below script in first replica to create a listener between first and second replica.
===============================================================================================

ALTER AVAILABILITY GROUP [AG_P_01]  
    ADD LISTENER 'AG_L_01' (
        WITH IP ( (N'Listener_ip',N'255.255.255.192') ) ,
        PORT = 1433);  
GO


-- execute below script to create AG02 in third replica.
========================================================

-- TCP://ServerName.TEST.COM:5022

CREATE AVAILABILITY GROUP [AG_P_02]  
FOR  
REPLICA ON N'ServerName' WITH (ENDPOINT_URL = N'TCP://ServerName.TEST.COM:5022',  
    FAILOVER_MODE = MANUAL,  
    AVAILABILITY_MODE = SYNCHRONOUS_COMMIT,  
    BACKUP_PRIORITY = 50,  
    SECONDARY_ROLE(ALLOW_CONNECTIONS = NO),  
    SEEDING_MODE = AUTOMATIC),  
N'ServerName' WITH (ENDPOINT_URL = N'TCP://ServerName.TEST.COM:5022',  
    FAILOVER_MODE = MANUAL,  
    AVAILABILITY_MODE = SYNCHRONOUS_COMMIT,  
    BACKUP_PRIORITY = 50,  
    SECONDARY_ROLE(ALLOW_CONNECTIONS = NO),  
    SEEDING_MODE = AUTOMATIC);  
GO

-- execute below script to join AG 02 to fourth replica.
========================================================


ALTER AVAILABILITY GROUP [AG_P_02] JOIN


ALTER AVAILABILITY GROUP [AG_P_02] GRANT CREATE ANY DATABASE
GO



-- Execute below script to create the AG listner for third and fourth replica.
==============================================================================

ALTER AVAILABILITY GROUP [AG_P_02]  
    ADD LISTENER 'AG_L_02' (
        WITH IP ( (N'Listner_2_ip',N'255.255.255.192') ) ,
        PORT = 1433);  
GO


-- Execute below script in first replica to create the Distributed AG between two listners.
==========================================================================================

CREATE AVAILABILITY GROUP [DistributedAG]
   WITH (DISTRIBUTED)  
   AVAILABILITY GROUP ON
      'AG_P_01' WITH  
      (  
         LISTENER_URL = 'TCP://AG_L_01.TEST.COM:5022',  
         AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,  
         FAILOVER_MODE = MANUAL,  
         SEEDING_MODE = AUTOMATIC  
      ),  
      'AG_P_02' WITH  
      (  
         LISTENER_URL = 'TCP://AG_L_02.TEST.COM:5022',  
         AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,  
         FAILOVER_MODE = MANUAL,  
         SEEDING_MODE = AUTOMATIC  
      );  
GO  

-- Execute below script in third replica to join the DAG to First replica.
=========================================================================

ALTER AVAILABILITY GROUP [DistributedAG]  
   JOIN  
   AVAILABILITY GROUP ON
      'AG_P_01' WITH  
      (  
         LISTENER_URL = 'TCP://AG_L_01.TEST.COM:5022',  
         AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,  
         FAILOVER_MODE = MANUAL,  
         SEEDING_MODE = AUTOMATIC  
      ),  
      'AG_P_02' WITH  
      (  
         LISTENER_URL = 'TCP://AG_L_02.TEST.COM:5022',  
         AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,  
         FAILOVER_MODE = MANUAL,  
         SEEDING_MODE = AUTOMATIC  
      );  
GO




Hints for troubleshooting, if any issue with DAG:
==================================================

1. check endpoints created with proper permission,
2. NTauthority \ system should have sysadmin permission.
3. service account should have access to endpoints.
4 check the enpoint started status.
5. check the hostname @@server name,
6. telnet and check the 5022 port to other replicas.
7. AG listner name limitation on number of characters.
8. open the master key and then restore the DBs.
9. if any restoration not progress for any db, close all the connection and initiate again.


SQL Server 2016 introduced a new feature called Distributed Availability Group. A Distributed Availability Group is a special type of Availability Group that spans two separate Availability Groups. You can look at it as an “Availability Group of Availability Groups”. The underlying Availability Groups are configured on two different Windows Server Failover Clustering (WSFC) clusters. This makes it a viable solution for the scenario described in the problem statement.
A high-level diagram of a Distributed Availability Group is shown below.
SQL Server Distributed Availability Groups
Distributed Availability Groups solve a lot of challenges with the traditional Availability Groups.
  • Disaster recovery solution for multi-site deployments. In the past, you need to deploy a geographically stretched WSFC in order to have secondary replicas in a different data center for disaster recovery. This meant dealing with network configurations like the CrossSubnetDelay and CrossSubnetThreshold failover cluster properties as described in this previous tip to avoid missed heartbeats that can potentially cause the failover cluster to go offline. It also meant choosing the appropriate quorum type and making sure that any issues with the failover cluster nodes on the DR data center does not affect the availability of the SQL Server Availability Group on the production data center. With Distributed Availability Groups, because you have two different WSFC, you treat them separately.
  • Operating System and/or hardware migration. In previous versions of Windows Server, upgrading the operating system or the hardware meant provisioning a new WSFC, copying the configuration and databases, synchronizing them to minimize downtime during the upgrade and performing the cutover. You have to wait until after the OS or hardware migration is complete before you can create a new Availability Group. Your databases are at risk while you configure a high availability solution. And while Windows Server 2016 allows for Cluster OS migration, I still prefer a clean install on a new hardware or virtual machine for OS upgrades. Distributed Availability Groups allow you to create an Availability Group even before performing the cutover, giving your databases immediate high availability immediately after the cutover. You can have different versions of the WSFC operating system so long as you have the same version of SQL Server (2016 and higher only).
  • Scaling out readable secondary replicas. Traditional Availability Groups allow for one primary and eight secondary replicas (from SQL Server 2014 and higher). With two Availability Groups, you get a total of eighteen (18) potential readable copies of the database – sixteen (16) secondary replicas, the primary replica of the first Availability Group and the primary replica of the second Availability Group. I don’t take this lightly because the licensing cost is not cheap. You have to really think about this before considering implementing Distributed Availability Groups mainly to scale out readable secondary replicas.

Considerations with Distributed Availability Groups

There are several things to consider when deploying Distributed Availability Groups:
  • Metadata will not exist in the WSFC. In addition to seeing the metadata in SQL Server via T-SQL, SSMS or PowerShell, traditional Availability Groups appear as cluster resources in the WSFC – the Availability Group is created as a resource group/role while the listener name is created as a virtual network name with a corresponding virtual IP address.  The screenshot below shows SSMS and the Failover Cluster Manager side-by-side displaying the Availability Group and listener name for a traditional Availability Group.
Failover Cluster Manager Online Status
On the other hand, the metadata for Distributed Availability Groups only exist inside SQL Server. This means that administering a Distributed Availability group is solely the database administrator’s responsibility.
  • Listener name is required. Unlike traditional Availability Groups where you can afford to not have a listener name and just use instance names for client application connectivity, Distributed Availability Groups require a listener name for each of the underlying Availability Group. The listener names are used as endpoints for the synchronization between the Availability Groups. What I don’t like about this is that you won’t be able to use dedicated network adapters for the Availability Group replication traffic. You have to use the network adapter that has connectivity to your DNS servers which is commonly the same as what the client applications use to connect to the database.
  • But the Distributed Availability Group does not have a listener name. Only the underlying Availability Groups require a listener name; the Distributed Availability Group does not. This means read-only routing will not work with Distributed Availability Groups. You have to explicitly point the client applications to the instance names of the readable secondary replicas. If you are using this as a cluster OS upgrade strategy, you will need to rename the listener on the secondary Availability Group after the failover to use the listener on the original primary Availability Group.
  • Endpoint listeners should listen on all IP address. For proper replication between the primary and secondary Availability Groups, the endpoints should be configured to listen to all IP addresses: LISTENER_IP = ALL. Be sure to use this parameter when creating the endpoints. Otherwise, replication between the primary and secondary Availability Group will fail.
  • Only one read-write copy of the database. Don’t be fooled – you can’t do load-balancing of read-write workloads. Just because you see two Availability Groups doesn’t mean you get two primary read-write replica databases. You only have one read-write copy of the database. The other primary replica on the secondary Availability Group functions similar to a distributor in a replication topology – it only receives transaction log records from the primary replica of the primary Availability Group and sends them to the other secondary replicas of the secondary Availability Group.
  • Efficient replication traffic between Availability Groups. With traditional Availability Groups, one primary replica is responsible for sending all the transaction log records to all the secondary replicas. If, let’s say, you have four (4) secondary replicas in your DR data center, the primary replica will have to send the same set of transaction log records four (4) times across the network. As mentioned in the previous item, with Distributed Availability Groups, the primary replica of the primary Availability Group will only send the transaction log records to the primary replica of the secondary Availability Group. Transaction log records are only sent once instead of four (4) times.
  • Only manual failover is supported. Because you have different WSFCs, there is no single coordinator that can handle automatic failover.
  • Currently limited to two (2) Availability Groups. A Distributed Availability Group is limited to two Availability Groups. However, an Availability Group can be a member of more than one Distributed Availability Group. You can think of this as chaining Availability Groups.

Step #1: Setup and Implement SQL Server 2016 Always On Distributed Availability Groups

The prerequisites for deploying Distributed Availability Groups are no different from traditional Availability Groups. For the scenario described where the secondary Availability Group will be used as a DR solution, below are the details of the implementation.
 ProductionDR
WSFCOS: Windows Server 2016OS: Windows Server 2016
 Nodes: WSFC-DC1-NODE1 and WSFC-DC1-NODE2Nodes: WSFC-DC2-NODE1 and WSFC-DC2-NODE2
 Cluster Name Object: WSFC-DC1Cluster Name Object: WSFC-DC2
 IP Subnet: 172.16.0.0/16IP Subnet: 192.168.0.0/24
Availability GroupName: AG_DC1Name: AG_DC2
 Listener: AG_DC1_LISTENERListener: AG_DC2_LISTENER
 Listener IP: 172.16.0.114Listener IP: 192.168.0.116
 Distributed Availability Group Name: DistAG_DC1_DC2 
Here’s a high-level overview of the steps for your reference.
  1. Create the primary Availability Group (AG_DC1) with a corresponding listener name (AG_DC1_LISTENER)
  2. Create the Availability Group endpoint on all the replicas in the secondary Availability Group
  3. Create login and grant the SQL Server service account CONNECT permissions to the endpoint
  4. Create the secondary Availability Group (AG_DC2) with a corresponding listener name (AG_DC2_LISTENER)
  5. Join the secondary replicas to the secondary Availability Group
  6. Create Distributed Availability Group (DistAG_DC1_DC2) on the primary Availability Group (AG_DC1)
  7. Join the secondary Availability Group (AG_DC2) to the Distributed Availability Group
The primary Availability Group AG_DC1 with the corresponding listener name has already been created. Refer to this tip on how to configure a traditional Availability Group.

Step #2: Create Availability Group endpoint on all the replicas in the secondary Availability Group

Use the T-SQL script below to create the endpoint on all of the replicas in the secondary Availability Group. The endpoints have already been created on the primary Availability Group as a side effect of Step #1. Be sure that the endpoint is listening on all IP addresses.
--Create endpoint on all Availability Group replicas
--Run this on the primary replica of the secondary Availability Group
:CONNECT WSFC-DC2-NODE1
USE [master]
GO

CREATE ENDPOINT [Hadr_endpoint]
   STATE=STARTED-
   AS TCP (LISTENER_PORT = 5022, LISTENER_IP = ALL)
   FOR DATA_MIRRORING (ROLE = ALL, AUTHENTICATION = WINDOWS NEGOTIATE
      , ENCRYPTION = REQUIRED ALGORITHM AES)
GO

--Run this on the secondary replica of the secondary Availability Group-
:CONNECT WSFC-DC2-NODE2
USE [master]
GO
CREATE ENDPOINT [Hadr_endpoint]
   STATE=STARTED
   AS TCP (LISTENER_PORT = 5022, LISTENER_IP = ALL)
   FOR DATA_MIRRORING (ROLE = ALL, AUTHENTICATION = WINDOWS NEGOTIATE
       , ENCRYPTION = REQUIRED ALGORITHM AES)
GO  
   

Step #3: Create login and grant the SQL Server service account CONNECT permissions to the endpoint

Because the SQL Server service account will impersonate the SQL Server instance when connecting to the replicas – from primary to secondary Availability group and vice versa – you need to create it as a SQL Server login and grant it the CONNECT permissions to the endpoint. The same SQL Server service account – TESTDOMAIN\sqlservice - is used for all of the Availability Group replicas. Use the T-SQL script below to create the SQL Server service account as a login and grant it CONNECT permissions on the endpoint. Do this on all of the replicas of the secondary Availability Group.
--Create login and grant CONNECT permissions to the SQL Server service account
USE master
GO

CREATE LOGIN [TESTDOMAIN\sqlservice] FROM WINDOWS;
GO

GRANT CONNECT ON ENDPOINT::Hadr_endpoint 
TO [TESTDOMAIN\sqlservice];  
GO
   
Note that if you are using a different SQL Server service account on each instance in the WSFC, you will need to create those as logins on the replicas that they need to connect to as well as grant CONNECT permissions to the endpoints. It becomes a bit more challenging to manage those service accounts. Even more complicated if the two WSFCs are in different Active Directory domains or no Active Directory domain at all. You will need to use certificates as described in this tip.

Step #4: Create the secondary Availability Group with a corresponding listener name

Use the T-SQL script below to create the secondary Availability Group AG_DC2. Be sure you are connected to the SQL Server instance that you want to configure as primary replica of the secondary Availability Group.
--Create second availability group on second failover cluster with replicas and listener
--Run this on the primary replica of the secondary Availability Group
:CONNECT WSFC-DC2-NODE1
CREATE AVAILABILITY GROUP [AG_DC2] 
FOR  
REPLICA ON
N'WSFC-DC2-NODE1' WITH
(
   ENDPOINT_URL = N'TCP://WSFC-DC2-NODE1.TESTDOMAIN.COM:5022',  
   FAILOVER_MODE = AUTOMATIC,  
   AVAILABILITY_MODE = SYNCHRONOUS_COMMIT,  
   SECONDARY_ROLE(ALLOW_CONNECTIONS = NO),  
   SEEDING_MODE = AUTOMATIC
),  
N'WSFC-DC2-NODE2' WITH
(  ENDPOINT_URL = N'TCP://WSFC-DC2-NODE2.TESTDOMAIN.COM:5022',  
   FAILOVER_MODE = AUTOMATIC,  
   AVAILABILITY_MODE = SYNCHRONOUS_COMMIT,  
   SECONDARY_ROLE(ALLOW_CONNECTIONS = NO),  
   SEEDING_MODE = AUTOMATIC
)
LISTENER 'AG_DC2_LISTENER'
(
   WITH IP ( ('192.168.0.116','255.255.255.0') ) ,
   PORT = 143
);  
GO  
   
The following parameters and their corresponding values are used to create the Availability Group
  • REPLICA: Notice that I didn’t specify any database when I created the Availability Group.  Addition of the database in the Availability Group is done thru the next parameter
  • SEEDING_MODE = AUTOMATIC: a new parameter in SQL Server 2016 that introduces direct seeding, this allows creation of a database inside an Availability Group without performing the usual backup-copy-restore
  • FAILOVER_MODE = AUTOMATIC: specifies automatic failover of the Availability Group; AUTOMATIC since this is within the Availability Group itself, not the Distributed Availability Group
  • AVAILABILITY_MODE = SYNCHRONOUS_COMMIT: specifies synchronous mode Availability Group replication; again, SYNCHRONOUS_COMMIT since this is within the Availability Group itself, not the Distributed Availability Group
  • SECONDARY_ROLE (ALLOW_CONNECTIONS = NO): specifies secondary replica databases to only be on standby and not used for read-only workloads

Step #5: Join the secondary replicas to the secondary Availability Group

The previous step simply created the Availability Group and defined the replicas. You need to join all the secondary replicas to the Availability Group.
--Join the secondary replicas to the secondary Availability Group
--Run this on the secondary replicas of the secondary Availability Group 
:CONNECT WSFC-DC2-NODE2
ALTER AVAILABILITY GROUP [AG_DC2] JOIN

--Allow the Availability Group to create databases on behalf of the primary replica
ALTER AVAILABILITY GROUP [AG_DC2] GRANT CREATE ANY DATABASE
GO
   
The second statement – ALTER AVAILABILITY GROUP [agname] GRANT CREATE ANY DATABASE – is added because direct seeding was used in the creation of the Availability Group. Should you decide to manually perform the backup-copy-restore process to initialize the database, you can use this command instead:
ALTER DATABASE [dbName] SET HADR AVAILABILITY GROUP = [agName]
Now, because this is still a traditional Availability Group, you should see it as a resource group/role from within the Failover Cluster Manager.
Join the secondary replicas to the secondary Availability Group
Don’t get over-excited just yet. Because you don’t have the Distributed Availability Group yet, the secondary replica on the secondary Availability Group will report as NOT SYNCHRONIZING. That’s because you don’t have any database yet in the Availability Group. A database will be added when you create the Distributed Availability Group.

Step #6: Create Distributed Availability Group on the primary Availability Group

Once the secondary Availability Group has been created, you can now proceed to create the Distributed Availability Group. Use the T-SQL script below to create the Distributed Availability Group DistAG_DC1_DC2. Be sure you are connected to the SQL Server instance that you want to configure as primary replica of the primary Availability Group.
--Create Distributed Availability Group
--Run this on the primary replica of the primary Availability Group
:CONNECT WSFC-DC1-NODE1
CREATE AVAILABILITY GROUP [DistAG_DC1_DC2]  
WITH (DISTRIBUTED)   
AVAILABILITY GROUP ON  
'AG_DC1' WITH    
(   
   LISTENER_URL = 'TCP://AG_DC1_LISTENER.TESTDOMAIN.COM:5022',    
   AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,   
   FAILOVER_MODE = MANUAL,   
   SEEDING_MODE = AUTOMATIC   
),   
'AG_DC2' WITH    
(   
   LISTENER_URL = 'TCP://AG_DC2_LISTENER.TESTDOMAIN.COM:5022',   
   AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,   
   FAILOVER_MODE = MANUAL,   
   SEEDING_MODE = AUTOMATIC   
);    
GO  
   
The following parameters and their corresponding values are used to create the Availability Group
  • DISTRIBUTED: This tells SQL Server that you are creating a Distributed Availability Group
  • LISTENER_URL: Notice that this parameter was used instead of ENDPOINT_URL. This parameter specifies the listener for each Availability Group along with the endpoint of the Availability Group – 5022 - not the endpoint of the listener - 1433.
  • FAILOVER_MODE = MANUAL: specifies automatic failover of the Availability Group; MANUAL since this now the Distributed Availability Group
  • AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT: specifies synchronous mode Availability Group replication; ASYNCHRONOUS_COMMIT since this is now the Distributed Availability Group
Be careful with specifying the LISTENER_URL parameter. When the listener was created, a corresponding Active Directory virtual computer object and a DNS entry are created. I used port 1433 as the listener port number – of course, with a different IP address - to simplify things since this is the same port number as the default SQL Server instance and will likely be opened in your network firewall. However, port 5022 is used to connect to the endpoint for log record synchronization. Since accessing a network resource requires an IP address and a port number, you can think of this as using the listener to find the secondary replica and port 5022 to connect to the endpoint. Be sure to open up port 5022 in your network firewall.

Step #7: Join the secondary Availability Group to the Distributed Availability Group

Once the Distributed Availability Group has been created, you can now proceed to join the secondary Availability Group. Use the T-SQL script below to join the Availability Group AD_DC2 to the Distributed Availability Group DistAG_DC1_DC2. Be sure you are connected to the SQL Server instance that you want to configure as primary replica of the secondary Availability Group.
--Create second availability group on second failover cluster with replicas and listener
--Run this on the primary replica of the secondary Availability Group
:CONNECT WSFC-DC2-NODE1
ALTER AVAILABILITY GROUP [DistAG_DC1_DC2]   
JOIN   
AVAILABILITY GROUP ON  
'AG_DC1' WITH    
(   
   LISTENER_URL = 'TCP://AG_DC1_LISTENER.TESTDOMAIN.COM:5022',    
   AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,   
   FAILOVER_MODE = MANUAL,   
   SEEDING_MODE = AUTOMATIC   
),   
'AG_DC2' WITH    
(   
   LISTENER_URL = 'TCP://AG_DC2_LISTENER.TESTDOMAIN.COM:5022',   
   AVAILABILITY_MODE = ASYNCHRONOUS_COMMIT,   
   FAILOVER_MODE = MANUAL,   
   SEEDING_MODE = AUTOMATIC   
);    
GO     
   
Review the Distributed Availability Group by expanding the Availability Groups folder in SSMS. Notice the word Distributed appended to the Distributed Availability Group.
Review the Distributed Availability Group by expanding the Availability Groups folder in SSMS. Notice the word Distributed appended to the Distributed Availability Group.
Also, notice that there is no resource group/role created in any of the WSFC. As mentioned, the metadata is all stored within SQL Server. In fact, even SSMS does not display the name of the databases in the Distributed Availability Group. You can use the T-SQL script below to view the metadata and status of the Distributed Availability Group.
--View metadata and status of the Distributed Availability Group
SELECT r.replica_server_name, r.endpoint_url,
rs.connected_state_desc, rs.role_desc, rs.operational_state_desc,
rs.recovery_health_desc,rs.synchronization_health_desc,
r.availability_mode_desc, r.failover_mode_desc
FROM sys.dm_hadr_availability_replica_states rs 
INNER JOIN sys.availability_replicas r
ON rs.replica_id=r.replica_id
ORDER BY r.replica_server_name    
   
T-SQL script below to view the metadata and status of the Distributed Availability Group
Implementing Distributed Availability Groups requires proper planning and thorough documentation. It’s not as simple as it is. Use this tip as a guide to help you successfully provide a disaster recovery solution for your SQL Server databases using Distributed Availability Group.

Thursday 30 November 2017

How to Remove Active/Passive node from SQL Server 2016 Failover Cluster with Demo

How to Remove Active/Passive node from SQL Server 2016 Failover Cluster


Active and Passive Node:


Active Passive Node SQL Services will be running on One Node only

that means you should install SQL Server on first Node then you will add second Node to the first Node

so when failover occur it will move to other node


Active/Active or Active/Passive Clustering

SQL Server clusters are often referred to as either being Active/Active or Active/Passive.  When the terminology is used correctly this simply means that either all nodes (servers) participating in a Microsoft SQL Server cluster are dedicated to running at least a single SQL instance (Active-Active) or at least one of these nodes is reserved as a standby to accept failover of a SQL instance if one occurs.  You could very well have an Active-Active-Active-Passive cluster, in the case of my production environment before the very long day I spoke of above; it's really still just classified as Active-Passive.  That's it; quite simple in today's world of convoluted technical terminology.  I do hope you're not disappointed.  Thank you for reading this tip and goodbye...
Ah, but now for the more challenging question of "When to use Active-Active or Active-Passive architecture in your SQL Server environment."  Knowing the terminology only takes you so far.  To answer this question, we need to look at why one would not always use Active-Passive architecture.  Sadly, the answer is typically going to be cost. 

Active-Active Clustering Considerations

When we created our first Microsoft SQL Server 2005 cluster we did so with two nodes; an instance of SQL Server 2005 Enterprise Edition running on each node.  This was also a 64-bit environment and had substantial amounts of RAM (though not nearly enough we would come to find out.)  There was a terabyte of SAN dedicated for the cluster.  This was a pricey consumer of space in our data center.  The purpose of this cluster was not for High Availability (HA) as a Microsoft SQL Server cluster is typically designed, but rather for a mass consolidation effort.  At the time I supported over 60 instances of SQL 2000 and about 10 SQL Server 2005 instances.  We were able to tap into our capital fund for this new cluster in order to hopefully consolidate fifteen of these instances into the two instances that were to be hosted on the cluster.  The purposes of the consolidation effort:  reduced administrative overhead at both the hardware and database layers, upgrade of hardware that had reached the end of their support agreements, reduced licensing costs, and increased productivity for the Database Administrator. 
However, as many of you have experienced in your organizations, many of the vendor-created databases we purchased were not only slow to adopt Microsoft SQL Server 2005, but also slow or unwilling to apply resources in their companies to certify that their databases and applications could run in 8.0 compatibility mode on a SQL 2005 instance.  Furthermore, we even had select vendors tell us that they would not support a database running in a clustered environment.  We ended up with our clustered environment for SQL 2005 hosting only a fraction of our SQL databases from those we targeted for consolidation.  Maximum CPU never exceeded 20% and memory requirements never reached half of the 16gb of RAM installed for each server (node).  Our situation was the same as for many others who choose to cluster in an Active-Active fashion:  we had limited funds available for the clustering initiative, and/or our actual or estimated load on the SQL servers would not consume more than half the resources for each node. 
I think this is the right time to make an important note that has not been discussed to this point:  just because a node in a clustered environment is already hosting a SQL instance (acting as an Active node, in other words) does not preclude it from hosting another SQL instance.  In fact, the server behaves no differently in this respect as it does in a non-clustered environment.  A server can host multiple instances of SQL Server:  a single default instance, and multiple additional named instances.  Your only limits are the edition of Microsoft SQL Server, and of course, system resources.
In our Active-Active configuration we were able to achieve two important goals:  consolidation, to some extent was achieved as was high-availability.  Though we had no passive node in our cluster, we could safely fail over either instance so that both were running on the same physical server and not over-taxing the available resources of the server.  Our SQL instances were configured to use a minimum of 4gb of RAM, and a maximum of 6gb.  These settings reserved memory for the O/S and other non-SQL processes and also ensured that if a failover occurred the server could continue to handle the load of both instances at the levels of utilization we had monitored to date.
If you have the resources available to support the SQL instances in your cluster in this manner, without the need to maintain a passive node, then this architecture may be affective for you.  However, this means that you are most-likely not utilizing the SQL instances to their full potential, or if you are that you've over-purchased hardware or memory for the workload involved.  Essentially, it means you've not spent your money efficiently (unless this was your intention.)  Most organizations end up with Active-Active SQL Server clusters not out of design, but out of circumstance.  They may start out with a SQL Server cluster with a single SQL instance and a passive node ready for failover, but find that they are directed to add a new instance to the cluster for some "hot" project that did not have funding for hardware (been there, bought the refrigerator magnet) or have under-estimated their consolidation effort and are forced to add a new instance to the cluster with the promise of a new passive node the next time funds are made available.  (Hey look, a t-shirt to go with that fridge magnet!)  I do hope I've made my point that Active-Active clustering may have it's role, but it usually means you're forgoing better utilization of your hardware during the majority of time when the SQL instance(s) are running properly.

Active-Passive Clustering Considerations

High-Availability technically means that your SQL instances are available for user consumption of data.  I would prefer to refine this definition and state that the data should not just be readily available, but also responding to user requests at a rate that are consistent with regular performance benchmarks.  This means that your cluster failover process needs to allow for consistency in hardware between the nodes in your cluster. 
Best practices state that all nodes in a cluster should be identical in terms of hardware, memory, and SQL Server editions.  In the event of a failover in a cluster with a passive node, the SQL instance's ownership is switched to the passive node.  After a brief period of downtime, typically around 10 seconds, the SQL instance should be running in an identical state of performance as it did on the original owner node (if the physical specifications for each node are identical.)  You would configure the Microsoft SQL Server 2005 instance to utilize more memory then in an Active-Active architecture since you will only be running a single instance of SQL on a node at any given time. 
I still strongly suggest reserving 4gb of RAM for the O/S and non-SQL operations of the server.  You can add more load to the SQL instance on the active node in an Active-Passive cluster versus a similarly-designed active node in an Active-Active cluster.  Your utilization of resources on the active node will be much more economical as a result.  In our environment we also utilize our passive node to host the resources for the backup volume on our clustered environment.  We utilize SQL native tools to backup to the SAN volume and have a third-party tool that writes these disk backups to tape on a nightly basis.  The overhead associated with this process occurs on the passive node.  In the event of a failover we have the ability to reschedule this process to a low point in SQL activity if we find contention for resources. 
There is an important trade-off for the benefits of high-availability and consistency of performance.  You must purchase and support a server that will hopefully never be used in production.  A final item of note in regards to passive nodes:  Microsoft does not require you to license the passive node of SQL Server 2005 in an Active-Passive clustered environment.

Further Considerations

You must understand that no matter which architectural plan is used in your environment, you still end up with only a single instance of your data and the SQL Server objects (logins, Agent jobs, etc.) on your clustered instance.  Unlike other forms of high-availability design afforded to the Microsoft SQL Server DBA, clustering only provides the means to ensure you are protected from server hardware failure.  It is strongly recommended that you incorporate some form of redundancy into your disaster recovery plan and not rely on clustering alone.  Such options include SAN redundancy products, database mirroring, log shipping, or replication to protect you against media failure on your SAN for which clustering fails to protect.

Demo:

Server manager we can see the Node01 information



Here you can Node01 SQL Server is running it should be active node and owner node also shows as Node01



you can see Node 01 and Node 02 info here


Disk info here


Node 01 SQL Server is running we can see from Configuration manager



here it passive node that is NODE02


On Node01 Launch SQL setup file


Now you can see Maintenance on left side


Now click Remove Node from SQL Server failover cluster


Now show validation status


it shows name of this node



Now click Remove


Node Removal is in progress


SQL Node 01 SQL Server is removed


Repeat the same steps on Passive Node Node 02

we can confirm SQL service status both node






Launch failover cluster manager


Now we can right click on role and remove it from cluster










Wednesday 29 November 2017

To generate SQL Server Health Check Report via Database Mail in SQL

To generate SQL Server Health Check Report via Database Mail in SQL


Step 1:

we need to create following tables on SQL Server db

CREATE TABLE [dbo].[backup_healthcheck](
[server_name] [varchar](100) NOT NULL,
[database_name] [varchar](100) NOT NULL,
[LastFullBackup] [datetime] NULL,
[LastDifferential] [datetime] NULL,
[LastLog] [datetime] NULL
)



CREATE TABLE [dbo].[DB_status](
[server_name] [varchar](100) NOT NULL,
[name] [varchar](100) NULL,
[state_desc] [varchar](50) NULL



CREATE TABLE [dbo].[disk_details](
[server_name] [varchar](100) NOT NULL,
[Disk] [varchar](100) NOT NULL,
[FreeSpaceInGB] [varchar](100) NULL,
[TotalSpaceInGB] [varchar](100) NULL,
[FreeSpaceInPct] [varchar](100) NULL
)



CREATE TABLE [dbo].[Job_Details](
[Server_Name] [varchar](100) NOT NULL,
[Job_Name] [varchar](300) NOT NULL,
[Category] [varchar](400) NULL,
[Job_enabled] [varchar](10) NOT NULL,
[last_run_date] [varchar](100) NULL,
[last_run_status] [varchar](200) NULL
)





Step2:

Need to create Database  DB_Admin on SQL Server

Step 3:

Need to create SP from following script on DB_Admin db

CREATE  procedure [dbo].[usp_mon_Health_status_of_all_servers]      
      
       
      
as      
      
       
      
begin      
      
       
      
declare @sql nvarchar(4000)      
      
       
      
declare @dbs nvarchar(4000)      
      
       
      
declare @jobs nvarchar(4000)      
      
       
      
declare @disk nvarchar(4000)      
      
       
      
declare @return_code int      
      
       
      
declare @last_backup_date datetime      
      
       
      
       
      
declare @server_name sysname      
      
       
      
       
      
       
      
declare servers_cursor cursor for      
      
       
      
       
      
select srvname from master.dbo.sysservers where srvname not in ('repl_distributor','LOGSHIPLINK_USER-PC\MSSQL2012DESTSQL_-264832719')--('USER-PC\MSSQL2012SOURCE')      
      
      
order by srvname      
      
       
      
       
      
delete from backup_healthcheck      
      
       
      
       
      
delete from db_status      
      
       
      
       
      
delete from Job_Details      
      
       
      
delete from disk_details      
      
       
      
       
      
open servers_cursor      
      
       
      
fetch servers_cursor into @server_name      
      
       
      
while @@fetch_status = 0      
      
       
      
begin      
      
       
      
        set @sql = ''      
      
       
      
                   set @sql = 'insert into backup_healthcheck      
      
       
      
                   SELECT server_name = '''  + @server_name + ''',sdb.name,      
      
       
      
       
      
    MAX(CASE WHEN backupset.type = ''D'' THEN backupset.backup_finish_date ELSE NULL END) AS LastFullBackup,      
      
       
      
  MAX(CASE WHEN backupset.type = ''I'' THEN backupset.backup_finish_date ELSE NULL END) AS LastDifferential,      
      
    MAX(CASE WHEN backupset.type = ''L'' THEN backupset.backup_finish_date ELSE NULL END) AS LastLog      
      
                FROM [' + @server_name + '].master.sys.sysdatabases sdb      
      
       
      
LEFT OUTER JOIN [' + @server_name + '].msdb.dbo.backupset ON backupset.database_name = sdb.Name      
      
       
      
       
      
where sdb.name not in (''tempdb'',''ReportServer'',''ReportServerTempDB'')      
      
       
      
GROUP BY sdb.name      
      
       
      
ORDER BY sdb.name DESC'      
      
       
      
--print @sql      
      
        exec sp_executesql @sql      
      
       
      
       
      
--set @disk = ''      
      
--set @disk = 'insert into disk_details select distinct server_name = '''  + @server_name + ''',dovs.volume_mount_point AS Drive,      
      
--CONVERT(decimal,dovs.available_bytes/1048576/1024) AS FreeSpaceInGB,      
      
--convert(decimal,dovs.total_bytes/1048576/1024) as TotalSpaceInGB,      
      
--cast(CONVERT(decimal,dovs.available_bytes/1048576/1024)/convert(decimal,dovs.total_bytes/1048576/1024)*100 as decimal(38,2)) as FreeSpaceInPct      
      
--FROM [' + @server_name + '].master.sys.master_files mf      
      
--CROSS APPLY sys.dm_os_volume_stats(mf.database_id, mf.FILE_ID) dovs'      
      
       
      
--        exec sp_executesql @disk      
      
       
      
       
      
set @dbs = ''      
      
       
      
set @dbs = 'insert into db_status select server_name = '''  + @server_name + ''',name,state_desc,getdate()       
      
       
      
       
      
       
      
FROM [' + @server_name + '].master.sys.databases order by name'      
      
       
      
--print @dbs      
      
       
      
       
      
       
      
exec sp_executesql @dbs      
      
       
      
       
      
       
      
set @jobs = ''      
      
       
      
set @jobs = 'insert into job_Details select server_name = '''  + @server_name +''',sj.name as job_name, sc.name as Category, sj.Enabled,      
      
       
      
        (SELECT top 1 endTime = CONVERT      
      
       
      
     ( DATETIME, RTRIM(run_date)) + (  run_time * 9 + run_time % 10000 * 6 + run_time % 100 * 10 + 25 * run_duration) / 216e4      
      
       
      
FROM [' + @server_name + '].msdb.dbo.sysjobhistory sjh where sjh.job_id = sj.job_id order by run_date desc, run_time desc) as last_run_date,      
      
       
      
CASE sjs.last_run_outcome      
      
       
      
WHEN 0 THEN ''Failed''      
      
       
      
WHEN 1 THEN ''Successful''      
      
       
      
WHEN 3 THEN ''Cancelled''      
      
       
      
WHEN 4 THEN ''In Progress''      
      
       
      
END AS LastRunStatus      
      
       
      
from [' + @server_name + '].msdb.dbo.sysjobs sj      
      
       
      
join [' + @server_name + '].msdb.dbo.syscategories sc      
      
       
      
        on sj.category_id = sc.category_id      
      
       
      
join [' + @server_name + '].msdb.dbo.sysjobservers sjs      
      
       
      
        on sjs.job_id = sj.job_id'      
      
       
exec sp_executesql @jobs      
      
  fetch servers_cursor into @server_name      
       
      
--print @jobs      
      
       
      
            
      
      
      
   Declare @emailSubject VARCHAR(100) ,@Body varchar(100),@sub varchar(1000)      
      
       
      
         set @sub='SQL Server Health Check Report for SQL Server'       
      
      
DECLARE @tableA_HTML  NVARCHAR(MAX) ;      
DECLARE @tableB_HTML  NVARCHAR(MAX) ;      
DECLARE @tableC_HTML  NVARCHAR(MAX) ;      
      
DECLARE @HTML  NVARCHAR(MAX) ;      
      
SET @tableA_HTML =      
    N'<H1>DB_status:</H1>' +      
    N'<table border="1">' +      
    N'<tr><th>server_name</th><th>name</th><th>state_desc</th><th>Status_date</th>     </tr>' +      
    CAST ( ( SELECT td =  server_name , '',      
                    td =  name , '',      
                    td =  state_desc, '',      
     td =   Status_date , ''       
              FROM DB_Admin.dbo.DB_status           
              FOR XML PATH('tr'), TYPE       
    ) AS NVARCHAR(MAX) ) +      
    N'</table>' ;      
      
       
SET @tableB_HTML =      
    N'<H1>Job_Details:</H1>' +      
    N'<table border="1">' +      
    N'<tr><th>Server_Name</th><th>Job_Name</th><th>Category</th><th>Job_enabled</th>  <th>last_run_date</th>       
 <th>last_run_status</th>  </tr>' +      
    CAST ( ( SELECT td =  Server_Name , '',      
                    td =  Job_Name , '',      
                    td =  Category, '',      
     td =  Job_enabled , '' ,      
     td =  last_run_date , '' ,      
     td =  last_run_status , ''       
              FROM DB_Admin.dbo.Job_Details           
              FOR XML PATH('tr'), TYPE       
    ) AS NVARCHAR(MAX) ) +      
    N'</table>' ;      
      
       
SET @tableC_HTML =      
    N'<H1>backup_healthcheck:</H1>' +      
    N'<table border="1">' +      
    N'<tr><th>server_name</th><th>database_name</th><th>LastFullBackup</th><th>LastDifferential</th>        
    <th>LastLog</th>       
       </tr>' +      
    CAST ( ( SELECT td =  server_name , '',      
                    td =  database_name , '',      
                    td =  LastFullBackup, '',      
     td =   LastDifferential , '' ,      
     td =   LastLog , ''       
              FROM DB_Admin.dbo.backup_healthcheck           
              FOR XML PATH('tr'), TYPE       
    ) AS NVARCHAR(MAX) ) +      
    N'</table>' ;      
      
      
      
      
SET @HTML = @tableA_HTML + ' <br/> ' +  @tableB_HTML + ' <br/> ' +  @tableC_HTML + '' ; -- Line Break      
SET @HTML = REPLACE(@HTML,'<th>','<th  bgcolor="Orange"/>' ); -- For BackGround Color       
      
      
      
   
      
      
      
end      
      
       
      
close servers_cursor      
      
       
      
       
      
deallocate servers_cursor      
      
       
 EXEC msdb.dbo.sp_send_dbmail      
      
    @recipients='servername@servermail.com;',      
      
    @profile_name = 'dbmailProfile',      
      
    @subject = @sub,      
      
    @body = @HTML,      
      
       @body_format = 'HTML'      
       
      
end      





Step 4:

Need to create SQL  Jobs

USE [msdb]
GO

/****** Object:  Job [DB_server_health_job]   
BEGIN TRANSACTION
DECLARE @ReturnCode INT
SELECT @ReturnCode = 0
/****** Object:  JobCategory [[Uncategorized (Local)]]    Script Date: 29-11-2017 18:01:30 ******/
IF NOT EXISTS (SELECT name FROM msdb.dbo.syscategories WHERE name=N'[Uncategorized (Local)]' AND category_class=1)
BEGIN
EXEC @ReturnCode = msdb.dbo.sp_add_category @class=N'JOB', @type=N'LOCAL', @name=N'[Uncategorized (Local)]'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback

END

DECLARE @jobId BINARY(16)
EXEC @ReturnCode =  msdb.dbo.sp_add_job @job_name=N'DB_server_health_job',
@enabled=1,
@notify_level_eventlog=0,
@notify_level_email=0,
@notify_level_netsend=0,
@notify_level_page=0,
@delete_level=0,
@description=N'No description available.',
@category_name=N'[Uncategorized (Local)]',
@owner_login_name=N'USER-PC\Administrator', @job_id = @jobId OUTPUT
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
/****** Object:  Step [Step 1]    Script Date: 29-11-2017 18:01:30 ******/
EXEC @ReturnCode = msdb.dbo.sp_add_jobstep @job_id=@jobId, @step_name=N'Step 1',
@step_id=1,
@cmdexec_success_code=0,
@on_success_action=1,
@on_success_step_id=0,
@on_fail_action=2,
@on_fail_step_id=0,
@retry_attempts=0,
@retry_interval=0,
@os_run_priority=0, @subsystem=N'TSQL',
@command=N'exec db_admin.dbo.usp_mon_Health_status_of_all_servers',
@database_name=N'DB_Admin',
@flags=0
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_update_job @job_id = @jobId, @start_step_id = 1
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobschedule @job_id=@jobId, @name=N'Once a day',
@enabled=1,
@freq_type=4,
@freq_interval=1,
@freq_subday_type=1,
@freq_subday_interval=0,
@freq_relative_interval=0,
@freq_recurrence_factor=0,
@active_start_date=20170911,
@active_end_date=99991231,
@active_start_time=115900,
@active_end_time=235959,
@schedule_uid=N'351f1f71-4248-400e-8a7f-60e5d2ea98b1'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobserver @job_id = @jobId, @server_name = N'(local)'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
COMMIT TRANSACTION
GOTO EndSave
QuitWithRollback:
    IF (@@TRANCOUNT > 0) ROLLBACK TRANSACTION
EndSave:

GO



Step 5:

Need to Create linked server and configure Database Mail



Step 6:

Run the job to get the report